diff --git a/3rd/pcre2/AUTHORS.md b/3rd/pcre2/AUTHORS.md new file mode 100644 index 00000000..708fc232 --- /dev/null +++ b/3rd/pcre2/AUTHORS.md @@ -0,0 +1,200 @@ +PCRE2 Authorship and Contributors +================================= + +COPYRIGHT +--------- + +Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for +copyright details. + + +MAINTAINERS +----------- + +The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel. + +Since 2024, the contributors with administrator access to the project are now +Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for +GPG keys. + +Both administrators are volunteers acting in a personal capacity. + + + + + + + + + + + + + + + + + + +
NameRole
+ + Nicholas Wilson
+ `nicholas@nicholaswilson.me.uk`
+ Currently of Microsoft Research Cambridge, UK + +
+ + * General project administration & maintenance + * Release management + * Code maintenance + +
+ + Zoltán Herczeg
+ `hzmester@freemail.hu`
+ Currently of the University of Szeged, Hungary + +
+ + * Code maintenance + * Ownership of `sljit` and PCRE2's JIT + +
+ + +CONTRIBUTORS +------------ + +Many others have participated and contributed to PCRE2 over its history. + +The maintainers are grateful for all contributions and participation over the +years. We apologise for any names we have forgotten. + +We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and +maintainer from 1997 to 2024. + +All names listed alphabetically. + +### Contributors to PCRE2 + +This list includes names up until the PCRE2 10.44 release. New names will be +added from the Git history on each release. + + Scott Bell + Carlo Marcelo Arenas Belón + Edward Betts + Jan-Willem Blokland + Ross Burton + Dmitry Cherniachenko + Alexey Chupahin + Jessica Clarke + Alejandro Colomar + Jeremie Courreges-Anglas + Addison Crump + Alex Dowad + Daniel Engberg + Daniel Richard G + David Gaussmann + Andrey Gorbachev + Jordan Griege + Jason Hood + Bumsu Hyeon + Roy Ivy + Martin Joerg + Guillem Jover + Ralf Junker + Ayesh Karunaratne + Michael Kaufmann + Yunho Kim + Joshua Kinard + David Korczynski + Uwe Korn + Jonas Kvinge + Kristian Larsson + Kai Lu + Behzod Mansurov + B. Scott Michel + Nathan Moinvaziri + Mike Munday + Marc Mutz + Fabio Pagani + Christian Persch + Tristan Ross + William A Rowe Jr + David Seifert + Yaakov Selkowitz + Rich Siegel + Karl Skomski + Maciej Sroczyński + Wolfgang Stöggl + Thomas Tempelmann + Greg Thain + Lucas Trzesniewski + Theodore Tsirpanis + Matthew Vernon + Rémi Verschelde + Thomas Voss + Ezekiel Warren + Carl Weaver + Chris Wilson + Amin Yahyaabadi + Joe Zhang + +### Contributors to PCRE1 + +These people contributed either by sending patches or reporting serious issues. + + Irfan Adilovic + Alexander Barkov + Daniel Bergström + David Burgess + Ross Burton + David Byron + Fred Cox + Christian Ehrlicher + Tom Fortmann + Lionel Fourquaux + Mike Frysinger + Daniel Richard G + Dair Gran + "Graycode" (Red Hat Product Security) + Viktor Griph + Wen Guanxing + Robin Houston + Martin Jerabek + Peter Kankowski + Stephen Kelly + Yunho Kim + Joshua Kinard + Carsten Klein + Evgeny Kotkov + Ronald Landheer-Cieslak + Alan Lehotsky + Dmitry V. Levin + Nuno Lopes + Kai Lu + Giuseppe Maxia + Dan Mooney + Marc Mutz + Markus Oberhumer + Sheri Pierce + Petr Pisar + Ari Pollak + Bob Rossi + Ruiger Rill + Michael Shigorin + Rich Siegel + Craig Silverstein (C++ wrapper) + Karl Skomski + Paul Sokolovsky + Stan Switzer + Ian Taylor + Mark Tetrode + Jeff Trawick + Steven Van Ingelgem + Lawrence Velazquez + Jiong Wang + Stefan Weber + Chris Wilson + +Thanks go to Jeffrey Friedl for testing and debugging assistance. diff --git a/3rd/pcre2/BUILD.bazel b/3rd/pcre2/BUILD.bazel new file mode 100644 index 00000000..c975eadd --- /dev/null +++ b/3rd/pcre2/BUILD.bazel @@ -0,0 +1,172 @@ +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") +load("@bazel_skylib//rules:native_binary.bzl", "native_test") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +copy_file( + name = "config_h_generic", + src = "src/config.h.generic", + out = "src/config.h", +) + +copy_file( + name = "pcre2_h_generic", + src = "src/pcre2.h.generic", + out = "src/pcre2.h", +) + +copy_file( + name = "pcre2_chartables_c", + src = "src/pcre2_chartables.c.dist", + out = "src/pcre2_chartables.c", +) + +# Removed src/pcre2_ucptables.c below because it is #included in +# src/pcre2_tables.c. Also fixed typo: ckdint should be chkdint. +# PH, 22-March-2023. +cc_library( + name = "pcre2", + srcs = [ + "src/pcre2_auto_possess.c", + "src/pcre2_chkdint.c", + "src/pcre2_compile.c", + "src/pcre2_compile_class.c", + "src/pcre2_config.c", + "src/pcre2_context.c", + "src/pcre2_convert.c", + "src/pcre2_dfa_match.c", + "src/pcre2_error.c", + "src/pcre2_extuni.c", + "src/pcre2_find_bracket.c", + "src/pcre2_jit_compile.c", + "src/pcre2_maketables.c", + "src/pcre2_match.c", + "src/pcre2_match_data.c", + "src/pcre2_newline.c", + "src/pcre2_ord2utf.c", + "src/pcre2_pattern_info.c", + "src/pcre2_script_run.c", + "src/pcre2_serialize.c", + "src/pcre2_string_utils.c", + "src/pcre2_study.c", + "src/pcre2_substitute.c", + "src/pcre2_substring.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + "src/pcre2_xclass.c", + ":pcre2_chartables_c", + "src/pcre2_compile.h", + "src/pcre2_internal.h", + "src/pcre2_intmodedep.h", + "src/pcre2_ucp.h", + "src/pcre2_util.h", + ":config_h_generic", + ], + textual_hdrs = [ + "src/pcre2_jit_match.c", + "src/pcre2_jit_misc.c", + "src/pcre2_ucptables.c", + ], + hdrs = [ + ":pcre2_h_generic", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "PCRE2_CODE_UNIT_WIDTH=8", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + ], + includes = ["src"], + strip_include_prefix = "src", + visibility = ["//visibility:public"], +) + +cc_library( + name = "pcre2-posix", + srcs = [ + "src/pcre2posix.c", + ":config_h_generic", + ], + hdrs = [ + "src/pcre2posix.h", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "PCRE2_CODE_UNIT_WIDTH=8", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + ], + includes = ["src"], + strip_include_prefix = "src", + visibility = ["//visibility:public"], + deps = [":pcre2"], +) + +# Totally weird issue in Bazel. It won't let you #include any files unless they +# are declared to the build system. OK, fair enough. But - for a cc_binary it +# uses the file extension to determine whether it's a header or a compilation +# unit. But... we have several .c files which are #included, rather than treated +# as a compilation unit. +# +# For cc_library() above, we can overcome this with textual_hdrs. But that +# doesn't work for cc_binary(). Here's our workaround. +# +# https://github.com/bazelbuild/bazel/issues/680 +cc_library( + name = "pcre2test_dotc_headers", + hdrs = [ + "src/pcre2_chkdint.c", + "src/pcre2_printint.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + ], + strip_include_prefix = "src", + visibility = ["//visibility:private"], +) + +cc_binary( + name = "pcre2test", + srcs = [ + "src/pcre2test.c", + ":config_h_generic", + ], + local_defines = [ + "HAVE_CONFIG_H", + "HAVE_MEMMOVE", + "HAVE_STRERROR", + "PCRE2_STATIC", + "SUPPORT_UNICODE", + "SUPPORT_PCRE2_8", + ] + select({ + "@platforms//os:windows": [], + "//conditions:default": ["HAVE_UNISTD_H"], + }), + linkopts = select({ + "@platforms//os:windows": ["-STACK:2500000"], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], + deps = [":pcre2test_dotc_headers", ":pcre2", ":pcre2-posix"], +) + +filegroup( + name = "testdata", + srcs = glob(["testdata/*"]), +) + +native_test( + name = "pcre2_test", + src = select({ + "@platforms//os:windows": "RunTest.bat", + "//conditions:default": "RunTest", + }), + out = select({ + "@platforms//os:windows": "RunTest.bat", + "//conditions:default": "RunTest", + }), + data = [":pcre2test", ":testdata"], + size = "small", +) \ No newline at end of file diff --git a/3rd/pcre2/CMakeLists.txt b/3rd/pcre2/CMakeLists.txt new file mode 100644 index 00000000..9e916174 --- /dev/null +++ b/3rd/pcre2/CMakeLists.txt @@ -0,0 +1,1403 @@ +# CMakeLists.txt +# +# This file enables PCRE2 to be built with the CMake configuration and build +# tool. Download CMake in source or binary form from http://www.cmake.org/ +# Converted to support PCRE2 from the original PCRE file, August 2014. +# +# Original listfile by Christian Ehrlicher +# Refined and expanded by Daniel Richard G. +# 2007-09-14 mod by Sheri so 7.4 supported configuration options can be entered +# 2007-09-19 Adjusted by PH to retain previous default settings +# 2007-12-26 (a) On UNIX, use names libpcre instead of just pcre +# (b) Ensure pcretest and pcregrep link with the local library, +# not a previously-installed one. +# (c) Add PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, and +# PCRE_SUPPORT_LIBBZ2. +# 2008-01-20 Brought up to date to include several new features by Christian +# Ehrlicher. +# 2008-01-22 Sheri added options for backward compatibility of library names +# when building with minGW: +# if "ON", NON_STANDARD_LIB_PREFIX causes shared libraries to +# be built without "lib" as prefix. (The libraries will be named +# pcre.dll, pcreposix.dll and pcrecpp.dll). +# if "ON", NON_STANDARD_LIB_SUFFIX causes shared libraries to +# be built with suffix of "-0.dll". (The libraries will be named +# libpcre-0.dll, libpcreposix-0.dll and libpcrecpp-0.dll - same names +# built by default with Configure and Make. +# 2008-01-23 PH removed the automatic build of pcredemo. +# 2008-04-22 PH modified READLINE support so it finds NCURSES when needed. +# 2008-07-03 PH updated for revised UCP property support (change of files) +# 2009-03-23 PH applied Steven Van Ingelgem's patch to change the name +# CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE +# is included within another project. +# 2009-03-23 PH applied a modified version of Steven Van Ingelgem's patches to +# add options to stop the building of pcregrep and the tests, and +# to disable the final configuration report. +# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that +# are set by specifying a release type. +# 2010-01-02 PH added test for stdint.h +# 2010-03-02 PH added test for inttypes.h +# 2011-08-01 PH added PCREGREP_BUFSIZE +# 2011-08-22 PH added PCRE_SUPPORT_JIT +# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov +# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT +# 2011-10-04 Sheri added support for including coff data in windows shared libraries +# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in +# the source dir by the user prior to building +# 2011-10-04 Sheri changed various add_test's to use exes' location built instead +# of DEBUG location only (likely only matters in MSVC) +# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and +# RunGrepTest (used for UNIX and Msys) +# 2011-10-04 Sheri added scripts to provide needed variables and to execute +# RunTest.bat in Win32 (for effortless testing with "make test") +# 2011-10-04 Sheri Increased minimum required cmake version +# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c +# 2012-01-10 Zoltan Herczeg added libpcre16 support +# 2012-01-13 Stephen Kelly added out of source build support +# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out +# of the configure.ac file +# 2012-02-26 PH added support for libedit +# 2012-09-06 PH added support for PCRE_EBCDIC_NL25 +# 2012-09-08 ChPe added PCRE32 support +# 2012-10-23 PH added support for VALGRIND and GCOV +# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings +# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and +# so it has been removed. +# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".") +# 2013-11-05 PH added support for PARENS_NEST_LIMIT +# 2014-08-29 PH converted the file for PCRE2 (which has no C++). +# 2015-04-24 PH added support for PCRE2_DEBUG +# 2015-07-16 PH updated for new pcre2_find_bracket source module +# 2015-08-24 PH correct C_FLAGS setting (patch from Roy Ivy III) +# 2015-10=16 PH added support for never-backslash-C +# 2016-03-01 PH applied Chris Wilson's patch for MSVC static +# 2016-06-24 PH applied Chris Wilson's second patch, putting the first under +# a new option instead of being unconditional. +# 2016-10-05 PH fixed a typo (PCRE should be PCRE2) in above patch +# fix by David Gaussmann +# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE +# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30 +# 2017-04-08 PH added HEAP_LIMIT +# 2017-06-15 ZH added SUPPORT_JIT_SEALLOC support +# 2018-06-19 PH added checks for stdint.h and inttypes.h (later removed) +# 2018-06-27 PH added Daniel's patch to increase the stack for MSVC +# 2018-11-14 PH removed unnecessary checks for stdint.h and inttypes.h +# 2018-11-16 PH added PCRE2GREP_SUPPORT_CALLOUT_FORK support and tidied +# 2019-02-16 PH hacked to avoid CMP0026 policy issue (see comments below) +# 2020-03-16 PH renamed dftables as pcre2_dftables (as elsewhere) +# 2020-03-24 PH changed CMAKE_MODULE_PATH definition to add, not replace +# 2020-04-08 Carlo added function check for secure_getenv, fixed strerror +# 2020-04-16 enh added check for __attribute__((uninitialized)) +# 2020-04-25 PH applied patches from Uwe Korn to support pkg-config and +# library versioning. +# 2020-04-25 Carlo added function check for mkostemp used in ProtExecAllocator +# 2020-04-28 PH added function check for memfd_create based on Carlo's patch +# 2020-05-25 PH added a check for Intel CET +# 2020-12-03 PH altered the definition of pcre2test as suggested by Daniel +# 2021-06-29 JWSB added the option to build static library with PIC. +# 2021-07-05 JWSB modified such both the static and shared library can be +# build in one go. +# 2021-08-28 PH increased minimum version +# 2021-08-28 PH added test for realpath() +# 2022-12-10 PH added support for pcre2posix_test +# 2023-01-15 Carlo added C99 as the minimum required +# 2023-08-06 PH added support for setting variable length lookbehind maximum + +################################################################################ +# We have used `gersemi` for auto-formatting our CMake files. +# Applied to all CMake files using: +# > pip3 install gersemi +# > gersemi --in-place --line-length 120 --indent 2 \ +# ./CMakeLists.txt ./cmake/*.cmake ./cmake/*.cmake.in +################################################################################ + +# Increased minimum to 3.15 to allow use of string(REPEAT). +cmake_minimum_required(VERSION 3.15 FATAL_ERROR) +project(PCRE2 C) +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_STANDARD_REQUIRED TRUE) + +set(CMAKE_C_VISIBILITY_PRESET hidden) +cmake_policy(SET CMP0063 NEW) + +# Set policy CMP0026 to avoid warnings for the use of LOCATION in +# GET_TARGET_PROPERTY. This should no longer be required. +# CMAKE_POLICY(SET CMP0026 OLD) + +# With a recent cmake, you can provide a rootdir to look for non +# standard installed library dependencies, but to do so, the policy +# needs to be set to new (by uncommenting the following) +# CMAKE_POLICY(SET CMP0074 NEW) + +# For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH +# on the command line. +# SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +include_directories(${PROJECT_SOURCE_DIR}/src) + +# external packages +find_package(BZip2) +find_package(ZLIB) +find_package(Readline) +find_package(Editline) + +# Configuration checks + +include(CheckCSourceCompiles) +include(CheckFunctionExists) +include(CheckSymbolExists) +include(CheckIncludeFile) +include(CheckTypeSize) +include(GNUInstallDirs) # for CMAKE_INSTALL_LIBDIR + +check_include_file(assert.h HAVE_ASSERT_H) +check_include_file(dirent.h HAVE_DIRENT_H) +check_include_file(sys/stat.h HAVE_SYS_STAT_H) +check_include_file(sys/types.h HAVE_SYS_TYPES_H) +check_include_file(unistd.h HAVE_UNISTD_H) +check_include_file(windows.h HAVE_WINDOWS_H) + +check_symbol_exists(bcopy "strings.h" HAVE_BCOPY) +check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) +check_symbol_exists(memmove "string.h" HAVE_MEMMOVE) +check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) +check_symbol_exists(strerror "string.h" HAVE_STRERROR) + +check_c_source_compiles( + [=[ + #include + #include + int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; } + ]=] + HAVE_REALPATH +) + +set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) +if(NOT MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "XL") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") +endif() + +check_c_source_compiles( + "int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }" + HAVE_ATTRIBUTE_UNINITIALIZED +) + +check_c_source_compiles( + [=[ + extern __attribute__ ((visibility ("default"))) int f(void); + int main(void) { return f(); } + int f(void) { return 42; } + ]=] + HAVE_VISIBILITY +) + +set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS}) + +check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME) + +check_c_source_compiles( + [=[ + #include + int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; } + ]=] + HAVE_BUILTIN_MUL_OVERFLOW +) + +check_c_source_compiles( + "int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }" + HAVE_BUILTIN_UNREACHABLE +) + +if(HAVE_VISIBILITY) + set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=]) +else() + set(PCRE2_EXPORT) +endif() + +# Check whether Intel CET is enabled, and if so, adjust compiler flags. This +# code was written by PH, trying to imitate the logic from the autotools +# configuration. + +check_c_source_compiles( + [=[ + #ifndef __CET__ + #error CET is not enabled + #endif + int main() { return 0; } + ]=] + INTEL_CET_ENABLED +) + +if(INTEL_CET_ENABLED) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk") +endif() + +# User-configurable options +# +# Note: CMakeSetup displays these in alphabetical order, regardless of +# the order we use here. + +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries.") + +option(BUILD_STATIC_LIBS "Build static libraries." ON) + +option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON) + +option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF) + +option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF) + +option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF) + +set(PCRE2_DEBUG "IfDebugBuild" CACHE STRING "Include debugging code") +set_property(CACHE PCRE2_DEBUG PROPERTY STRINGS "IfDebugBuild" "ON" "OFF") + +option(PCRE2_DISABLE_PERCENT_ZT "Disable the use of %zu and %td (rarely needed)" OFF) + +set( + PCRE2_EBCDIC + OFF + CACHE BOOL + "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)" +) + +set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.") + +set( + PCRE2_LINK_SIZE + "2" + CACHE STRING + "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details." +) + +set( + PCRE2_PARENS_NEST_LIMIT + "250" + CACHE STRING + "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details." +) + +set( + PCRE2_HEAP_LIMIT + "20000000" + CACHE STRING + "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details." +) + +set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.") + +set( + PCRE2_MATCH_LIMIT + "10000000" + CACHE STRING + "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details." +) + +set( + PCRE2_MATCH_LIMIT_DEPTH + "MATCH_LIMIT" + CACHE STRING + "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details." +) + +set( + PCRE2GREP_BUFSIZE + "20480" + CACHE STRING + "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details." +) + +set( + PCRE2GREP_MAX_BUFSIZE + "1048576" + CACHE STRING + "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details." +) + +set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL).") + +set(PCRE2_HEAP_MATCH_RECURSE OFF CACHE BOOL "Obsolete option: do not use") + +set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.") + +if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).") +else() + set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE) +endif() + +set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.") + +set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.") + +set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.") + +set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.") + +set( + PCRE2_SUPPORT_BSR_ANYCRLF + OFF + CACHE BOOL + "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks" +) + +set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.") + +set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.") + +option(PCRE2_SHOW_REPORT "Show the final configuration report" ON) +option(PCRE2_BUILD_PCRE2GREP "Build pcre2grep" ON) +option(PCRE2_BUILD_TESTS "Build the tests" ON) + +set( + PCRE2_INSTALL_CMAKEDIR + "${CMAKE_INSTALL_LIBDIR}/cmake/pcre2" + CACHE STRING + "Path used during CMake install for placing PCRE2's CMake config files, relative to the installation root (prefix)" +) + +if(MINGW) + option( + NON_STANDARD_LIB_PREFIX + "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." + OFF + ) + + option( + NON_STANDARD_LIB_SUFFIX + "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." + OFF + ) +endif() + +if(MSVC) + option(PCRE2_STATIC_RUNTIME "ON=Compile against the static runtime (/MT)." OFF) + option(INSTALL_MSVC_PDB "ON=Install .pdb files built by MSVC, if generated" OFF) +endif() + +# bzip2 lib +if(BZIP2_FOUND) + option(PCRE2_SUPPORT_LIBBZ2 "Enable support for linking pcre2grep with libbz2." ON) +endif() +if(PCRE2_SUPPORT_LIBBZ2) + include_directories(${BZIP2_INCLUDE_DIR}) +endif() + +# zlib +if(ZLIB_FOUND) + option(PCRE2_SUPPORT_LIBZ "Enable support for linking pcre2grep with libz." ON) +endif() +if(PCRE2_SUPPORT_LIBZ) + include_directories(${ZLIB_INCLUDE_DIR}) +endif() + +# editline lib +if(EDITLINE_FOUND) + option(PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) +endif() +if(EDITLINE_FOUND) + if(PCRE2_SUPPORT_LIBEDIT) + include_directories(${EDITLINE_INCLUDE_DIR}) + endif() +else() + if(PCRE2_SUPPORT_LIBEDIT) + message( + FATAL_ERROR + " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n" + " or set Editline_ROOT to a full libedit installed tree, as needed\n" + " Might need to enable policy CMP0074 in CMakeLists.txt" + ) + endif() +endif() + +# readline lib +if(READLINE_FOUND) + option(PCRE2_SUPPORT_LIBREADLINE "Enable support for linking pcre2test with libreadline." ON) +endif() +if(PCRE2_SUPPORT_LIBREADLINE) + include_directories(${READLINE_INCLUDE_DIR}) +endif() + +# Prepare build configuration + +if(NOT BUILD_SHARED_LIBS AND NOT BUILD_STATIC_LIBS) + message(FATAL_ERROR "At least one of BUILD_SHARED_LIBS or BUILD_STATIC_LIBS must be enabled.") +endif() + +if(NOT PCRE2_BUILD_PCRE2_8 AND NOT PCRE2_BUILD_PCRE2_16 AND NOT PCRE2_BUILD_PCRE2_32) + message( + FATAL_ERROR + "At least one of PCRE2_BUILD_PCRE2_8, PCRE2_BUILD_PCRE2_16 or PCRE2_BUILD_PCRE2_32 must be enabled" + ) +endif() + +if(PCRE2_BUILD_PCRE2_8) + set(SUPPORT_PCRE2_8 1) +endif() + +if(PCRE2_BUILD_PCRE2_16) + set(SUPPORT_PCRE2_16 1) +endif() + +if(PCRE2_BUILD_PCRE2_32) + set(SUPPORT_PCRE2_32 1) +endif() + +if(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) + message(STATUS "** PCRE2_BUILD_PCRE2_8 must be enabled for the pcre2grep program") + set(PCRE2_BUILD_PCRE2GREP OFF) +endif() + +if(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) + if(READLINE_FOUND) + message( + FATAL_ERROR + " Only one of the readline compatible libraries can be enabled.\n" + " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" + ) + endif() +endif() + +if(PCRE2_SUPPORT_BSR_ANYCRLF) + set(BSR_ANYCRLF 1) +endif() + +if(PCRE2_NEVER_BACKSLASH_C) + set(NEVER_BACKSLASH_C 1) +endif() + +if(PCRE2_SUPPORT_UNICODE) + set(SUPPORT_UNICODE 1) +endif() + +if(PCRE2_SUPPORT_JIT) + set(SUPPORT_JIT 1) + if(UNIX) + find_package(Threads REQUIRED) + if(CMAKE_USE_PTHREADS_INIT) + set(REQUIRE_PTHREAD 1) + endif() + endif() +endif() + +if(PCRE2_SUPPORT_JIT_SEALLOC) + set(CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE) + check_symbol_exists(mkostemp stdlib.h REQUIRED) + unset(CMAKE_REQUIRED_DEFINITIONS) + if(${REQUIRED}) + if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD) + add_compile_definitions(_GNU_SOURCE) + set(SLJIT_PROT_EXECUTABLE_ALLOCATOR 1) + else() + message(FATAL_ERROR "Your configuration is not supported") + endif() + else() + set(PCRE2_SUPPORT_JIT_SEALLOC OFF) + endif() +endif() + +if(PCRE2GREP_SUPPORT_JIT) + set(SUPPORT_PCRE2GREP_JIT 1) +endif() + +if(PCRE2GREP_SUPPORT_CALLOUT) + set(SUPPORT_PCRE2GREP_CALLOUT 1) + if(PCRE2GREP_SUPPORT_CALLOUT_FORK) + set(SUPPORT_PCRE2GREP_CALLOUT_FORK 1) + endif() +endif() + +if(PCRE2_SUPPORT_VALGRIND) + set(SUPPORT_VALGRIND 1) +endif() + +if(PCRE2_DISABLE_PERCENT_ZT) + set(DISABLE_PERCENT_ZT 1) +endif() + +# This next one used to reference ${READLINE_LIBRARY}) +# but I was advised to add the NCURSES test as well, along with +# some modifications to cmake/FindReadline.cmake which should +# make it possible to override the default if necessary. PH + +if(PCRE2_SUPPORT_LIBREADLINE) + set(SUPPORT_LIBREADLINE 1) + set(PCRE2TEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY}) +endif() + +# libedit is a plug-compatible alternative to libreadline + +if(PCRE2_SUPPORT_LIBEDIT) + set(SUPPORT_LIBEDIT 1) + set(PCRE2TEST_LIBS ${EDITLINE_LIBRARY}) +endif() + +if(PCRE2_SUPPORT_LIBZ) + set(SUPPORT_LIBZ 1) + set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${ZLIB_LIBRARIES}) +endif() + +if(PCRE2_SUPPORT_LIBBZ2) + set(SUPPORT_LIBBZ2 1) + set(PCRE2GREP_LIBS ${PCRE2GREP_LIBS} ${BZIP2_LIBRARIES}) +endif() + +set(NEWLINE_DEFAULT "") + +if(PCRE2_NEWLINE STREQUAL "CR") + set(NEWLINE_DEFAULT "1") +endif() +if(PCRE2_NEWLINE STREQUAL "LF") + set(NEWLINE_DEFAULT "2") +endif() +if(PCRE2_NEWLINE STREQUAL "CRLF") + set(NEWLINE_DEFAULT "3") +endif() +if(PCRE2_NEWLINE STREQUAL "ANY") + set(NEWLINE_DEFAULT "4") +endif() +if(PCRE2_NEWLINE STREQUAL "ANYCRLF") + set(NEWLINE_DEFAULT "5") +endif() +if(PCRE2_NEWLINE STREQUAL "NUL") + set(NEWLINE_DEFAULT "6") +endif() + +if(NEWLINE_DEFAULT STREQUAL "") + message( + FATAL_ERROR + "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\"." + ) +endif() + +if(PCRE2_EBCDIC) + set(EBCDIC 1) +endif() + +if(PCRE2_EBCDIC_NL25) + set(EBCDIC 1) + set(EBCDIC_NL25 1) +endif() + +# Output files + +configure_file(config-cmake.h.in ${PROJECT_BINARY_DIR}/config.h @ONLY) + +# Parse version numbers and date out of configure.ac + +file( + STRINGS + ${PROJECT_SOURCE_DIR}/configure.ac + configure_lines + LIMIT_COUNT + 50 # Read only the first 50 lines of the file +) + +set( + SEARCHED_VARIABLES + "pcre2_major" + "pcre2_minor" + "pcre2_prerelease" + "pcre2_date" + "libpcre2_posix_version" + "libpcre2_8_version" + "libpcre2_16_version" + "libpcre2_32_version" +) +foreach(configure_line ${configure_lines}) + foreach(substitution_variable ${SEARCHED_VARIABLES}) + string(TOUPPER ${substitution_variable} substitution_variable_upper) + if(NOT ${substitution_variable_upper}) + string(REGEX MATCH "m4_define\\(${substitution_variable}, *\\[(.*)\\]" MATCHED_STRING ${configure_line}) + if(CMAKE_MATCH_1) + set(${substitution_variable_upper} ${CMAKE_MATCH_1}) + endif() + endif() + endforeach() +endforeach() + +macro(PARSE_LIB_VERSION variable_prefix) + string(REPLACE ":" ";" ${variable_prefix}_VERSION_LIST ${${variable_prefix}_VERSION}) + list(GET ${variable_prefix}_VERSION_LIST 0 ${variable_prefix}_VERSION_CURRENT) + list(GET ${variable_prefix}_VERSION_LIST 1 ${variable_prefix}_VERSION_REVISION) + list(GET ${variable_prefix}_VERSION_LIST 2 ${variable_prefix}_VERSION_AGE) + + math(EXPR ${variable_prefix}_SOVERSION "${${variable_prefix}_VERSION_CURRENT} - ${${variable_prefix}_VERSION_AGE}") + math(EXPR ${variable_prefix}_MACHO_COMPATIBILITY_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1") + math(EXPR ${variable_prefix}_MACHO_CURRENT_VERSION "${${variable_prefix}_VERSION_CURRENT} + 1") + set( + ${variable_prefix}_MACHO_CURRENT_VERSION + "${${variable_prefix}_MACHO_CURRENT_VERSION}.${${variable_prefix}_VERSION_REVISION}}" + ) + set( + ${variable_prefix}_VERSION + "${${variable_prefix}_SOVERSION}.${${variable_prefix}_VERSION_AGE}.${${variable_prefix}_VERSION_REVISION}" + ) +endmacro() + +parse_lib_version(LIBPCRE2_POSIX) +parse_lib_version(LIBPCRE2_8) +parse_lib_version(LIBPCRE2_16) +parse_lib_version(LIBPCRE2_32) + +configure_file(src/pcre2.h.in ${PROJECT_BINARY_DIR}/pcre2.h @ONLY) + +# Make sure to not link debug libs +# against release libs and vice versa +if(WIN32) + set(CMAKE_DEBUG_POSTFIX "d") +endif() + +# Character table generation + +option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF) +if(PCRE2_REBUILD_CHARTABLES) + add_executable(pcre2_dftables src/pcre2_dftables.c) + add_custom_command( + OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c + COMMAND pcre2_dftables + ARGS ${PROJECT_BINARY_DIR}/pcre2_chartables.c + DEPENDS pcre2_dftables + COMMENT "Generating character tables (pcre2_chartables.c) for current locale" + VERBATIM + ) +else() + configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY) +endif() + +# Source code + +set(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h) + +set( + PCRE2_SOURCES + src/pcre2_auto_possess.c + ${PROJECT_BINARY_DIR}/pcre2_chartables.c + src/pcre2_chkdint.c + src/pcre2_compile.c + src/pcre2_compile_class.c + src/pcre2_config.c + src/pcre2_context.c + src/pcre2_convert.c + src/pcre2_dfa_match.c + src/pcre2_error.c + src/pcre2_extuni.c + src/pcre2_find_bracket.c + src/pcre2_jit_compile.c + src/pcre2_maketables.c + src/pcre2_match.c + src/pcre2_match_data.c + src/pcre2_newline.c + src/pcre2_ord2utf.c + src/pcre2_pattern_info.c + src/pcre2_script_run.c + src/pcre2_serialize.c + src/pcre2_string_utils.c + src/pcre2_study.c + src/pcre2_substitute.c + src/pcre2_substring.c + src/pcre2_tables.c + src/pcre2_ucd.c + src/pcre2_valid_utf.c + src/pcre2_xclass.c +) + +set(PCRE2POSIX_HEADERS src/pcre2posix.h) +set(PCRE2POSIX_SOURCES src/pcre2posix.c) + +if(MINGW AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/pcre2.o PRE-LINK + COMMAND windres + ARGS pcre2.rc pcre2.o + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT "Using pcre2 coff info in mingw build" + ) + set(PCRE2_SOURCES ${PCRE2_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2.o) + endif() + + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/pcre2posix.o PRE-LINK + COMMAND windres + ARGS pcre2posix.rc pcre2posix.o + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + COMMENT "Using pcre2posix coff info in mingw build" + ) + set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcre2posix.o) + endif() +endif() + +if(MSVC AND BUILD_SHARED_LIBS) + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc) + set(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc) + endif() + + if(EXISTS ${PROJECT_SOURCE_DIR}/pcre2posix.rc) + set(PCRE2POSIX_SOURCES ${PCRE2POSIX_SOURCES} pcre2posix.rc) + endif() +endif() + +# Fix static compilation with MSVC: https://bugs.exim.org/show_bug.cgi?id=1681 +# This code was taken from the CMake wiki, not from WebM. + +if(MSVC AND PCRE2_STATIC_RUNTIME) + message(STATUS "** MSVC and PCRE2_STATIC_RUNTIME: modifying compiler flags to use static runtime library") + foreach( + flag_var + CMAKE_C_FLAGS + CMAKE_C_FLAGS_DEBUG + CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL + CMAKE_C_FLAGS_RELWITHDEBINFO + ) + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endforeach() +endif() + +# Build setup + +add_compile_definitions(HAVE_CONFIG_H) + +if(PCRE2_DEBUG STREQUAL "IfDebugBuild") + add_compile_definitions("$<$:PCRE2_DEBUG>") +elseif(PCRE2_DEBUG) + add_compile_definitions("PCRE2_DEBUG") +endif() + +if(MSVC) + add_compile_definitions(_CRT_SECURE_NO_DEPRECATE _CRT_SECURE_NO_WARNINGS) +endif() + +set(CMAKE_INCLUDE_CURRENT_DIR 1) + +set(TARGETS) + +# 8-bit library + +if(PCRE2_BUILD_PCRE2_8) + if(BUILD_STATIC_LIBS) + add_library(pcre2-8-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + set_target_properties( + pcre2-8-static + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + ) + target_compile_definitions(pcre2-8-static PUBLIC PCRE2_STATIC) + target_include_directories(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR}) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-8-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-8-static) + add_library(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + set_target_properties( + pcre2-posix-static + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + ) + target_link_libraries(pcre2-posix-static pcre2-8-static) + target_include_directories(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src) + set(TARGETS ${TARGETS} pcre2-posix-static) + + if(MSVC) + set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static) + set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix-static) + else() + set_target_properties(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8) + set_target_properties(pcre2-posix-static PROPERTIES OUTPUT_NAME pcre2-posix) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-8-static pcre2-posix-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() + + if(BUILD_SHARED_LIBS) + add_library(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-8-shared + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_8_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_8_VERSION} + SOVERSION ${LIBPCRE2_8_SOVERSION} + OUTPUT_NAME pcre2-8 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-8-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-8-shared) + set(DLL_PDB_FILES $/pcre2-8.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-8d.pdb ${DLL_PDB_DEBUG_FILES}) + + add_library(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) + target_include_directories(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src) + set_target_properties( + pcre2-posix-shared + PROPERTIES + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_POSIX_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_POSIX_VERSION} + SOVERSION ${LIBPCRE2_POSIX_SOVERSION} + OUTPUT_NAME pcre2-posix + ) + set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED") + target_compile_definitions(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG}) + target_link_libraries(pcre2-posix-shared pcre2-8-shared) + set(TARGETS ${TARGETS} pcre2-posix-shared) + set(DLL_PDB_FILES $/pcre2-posix.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-posixd.pdb ${DLL_PDB_DEBUG_FILES}) + + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-8-shared pcre2-posix-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() + + if(BUILD_STATIC_LIBS) + add_library(pcre2-8 ALIAS pcre2-8-static) + add_library(pcre2-posix ALIAS pcre2-posix-static) + else() + add_library(pcre2-8 ALIAS pcre2-8-shared) + add_library(pcre2-posix ALIAS pcre2-posix-shared) + endif() +endif() + +# 16-bit library + +if(PCRE2_BUILD_PCRE2_16) + if(BUILD_STATIC_LIBS) + add_library(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-16-static + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + ) + target_compile_definitions(pcre2-16-static PUBLIC PCRE2_STATIC) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-16-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-16-static) + + if(MSVC) + set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static) + else() + set_target_properties(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-16-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() + + if(BUILD_SHARED_LIBS) + add_library(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-16-shared + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_16_VERSION} + SOVERSION ${LIBPCRE2_16_SOVERSION} + OUTPUT_NAME pcre2-16 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-16-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-16-shared) + set(DLL_PDB_FILES $/pcre2-16.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-16d.pdb ${DLL_PDB_DEBUG_FILES}) + + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-16-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-16-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() + + if(BUILD_STATIC_LIBS) + add_library(pcre2-16 ALIAS pcre2-16-static) + else() + add_library(pcre2-16 ALIAS pcre2-16-shared) + endif() +endif() + +# 32-bit library + +if(PCRE2_BUILD_PCRE2_32) + if(BUILD_STATIC_LIBS) + add_library(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-32-static + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + ) + target_compile_definitions(pcre2-32-static PUBLIC PCRE2_STATIC) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-32-static Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-32-static) + + if(MSVC) + set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static) + else() + set_target_properties(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32) + endif() + if(PCRE2_STATIC_PIC) + set_target_properties(pcre2-32-static PROPERTIES POSITION_INDEPENDENT_CODE 1) + endif() + endif() + + if(BUILD_SHARED_LIBS) + add_library(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) + target_include_directories(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR}) + set_target_properties( + pcre2-32-shared + PROPERTIES + UNITY_BUILD OFF + COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32 + MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}" + MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}" + VERSION ${LIBPCRE2_32_VERSION} + SOVERSION ${LIBPCRE2_32_SOVERSION} + OUTPUT_NAME pcre2-32 + ) + if(REQUIRE_PTHREAD) + target_link_libraries(pcre2-32-shared Threads::Threads) + endif() + set(TARGETS ${TARGETS} pcre2-32-shared) + set(DLL_PDB_FILES $/pcre2-32.pdb ${DLL_PDB_FILES}) + set(DLL_PDB_DEBUG_FILES $/pcre2-32d.pdb ${DLL_PDB_DEBUG_FILES}) + + if(MINGW) + if(NON_STANDARD_LIB_PREFIX) + set_target_properties(pcre2-32-shared PROPERTIES PREFIX "") + endif() + if(NON_STANDARD_LIB_SUFFIX) + set_target_properties(pcre2-32-shared PROPERTIES SUFFIX "-0.dll") + endif() + endif() + endif() + + if(BUILD_STATIC_LIBS) + add_library(pcre2-32 ALIAS pcre2-32-static) + else() + add_library(pcre2-32 ALIAS pcre2-32-shared) + endif() +endif() + +# Generate pkg-config files + +set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}") +set(prefix ${CMAKE_INSTALL_PREFIX}) +set(exec_prefix "\${prefix}") +set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") +set(includedir "\${prefix}/include") +if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug)) + set(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX}) +endif() + +if(PCRE2_BUILD_PCRE2_8) + configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc") + configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc") + set(enable_pcre2_8 "yes") +else() + set(enable_pcre2_8 "no") +endif() + +if(PCRE2_BUILD_PCRE2_16) + configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc") + set(enable_pcre2_16 "yes") +else() + set(enable_pcre2_16 "no") +endif() + +if(PCRE2_BUILD_PCRE2_32) + configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY) + list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc") + set(enable_pcre2_32 "yes") +else() + set(enable_pcre2_32 "no") +endif() + +configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF) + +# Executables + +if(PCRE2_BUILD_PCRE2GREP) + add_executable(pcre2grep src/pcre2grep.c) + set_property(TARGET pcre2grep PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + set(TARGETS ${TARGETS} pcre2grep) + target_link_libraries(pcre2grep pcre2-posix ${PCRE2GREP_LIBS}) +endif() + +# Testing + +if(PCRE2_BUILD_TESTS) + enable_testing() + + set(PCRE2TEST_SOURCES src/pcre2test.c) + + if(MSVC) + # This is needed to avoid a stack overflow error in the standard tests. The + # flag should be indicated with a forward-slash instead of a hyphen, but + # then CMake treats it as a file path. + set(PCRE2TEST_LINKER_FLAGS -STACK:2500000) + endif() + + add_executable(pcre2test ${PCRE2TEST_SOURCES}) + set(TARGETS ${TARGETS} pcre2test) + if(PCRE2_BUILD_PCRE2_8) + list(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8) + endif() + if(PCRE2_BUILD_PCRE2_16) + list(APPEND PCRE2TEST_LIBS pcre2-16) + endif() + if(PCRE2_BUILD_PCRE2_32) + list(APPEND PCRE2TEST_LIBS pcre2-32) + endif() + target_link_libraries(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS}) + + if(PCRE2_BUILD_PCRE2_8) + add_executable(pcre2posix_test src/pcre2posix_test.c) + target_link_libraries(pcre2posix_test pcre2-posix pcre2-8) + endif() + + if(PCRE2_SUPPORT_JIT) + add_executable(pcre2_jit_test src/pcre2_jit_test.c) + set(PCRE2_JIT_TEST_LIBS) + if(PCRE2_BUILD_PCRE2_8) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-8) + endif() + if(PCRE2_BUILD_PCRE2_16) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-16) + endif() + if(PCRE2_BUILD_PCRE2_32) + list(APPEND PCRE2_JIT_TEST_LIBS pcre2-32) + endif() + target_link_libraries(pcre2_jit_test ${PCRE2_JIT_TEST_LIBS}) + endif() + + # ================================================= + # Write out a CTest configuration file + # + file( + WRITE + ${PROJECT_BINARY_DIR}/CTestCustom.ctest + "# This is a generated file. +MESSAGE(\"When testing is complete, review test output in the +\\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\") +MESSAGE(\" \") +" + ) + + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_test.sh + "#! /bin/sh +# This is a generated file. +srcdir=${PROJECT_SOURCE_DIR} +pcre2test=${PROJECT_BINARY_DIR}/pcre2test +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test +. ${PROJECT_SOURCE_DIR}/RunTest +if test \"$?\" != \"0\"; then exit 1; fi +# End +" + ) + + if(UNIX) + add_test(pcre2_test sh ${PROJECT_BINARY_DIR}/pcre2_test.sh) + endif() + + if(PCRE2_BUILD_PCRE2GREP) + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh + "#! /bin/sh +# This is a generated file. +srcdir=${PROJECT_SOURCE_DIR} +pcre2grep=${PROJECT_BINARY_DIR}/pcre2grep +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2grep=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2grep +pcre2test=${PROJECT_BINARY_DIR}/pcre2test +test -z \"$CMAKE_CONFIG_TYPE\" || pcre2test=${PROJECT_BINARY_DIR}/$CMAKE_CONFIG_TYPE/pcre2test +. ${PROJECT_SOURCE_DIR}/RunGrepTest +if test \"$?\" != \"0\"; then exit 1; fi +# End +" + ) + + if(UNIX) + add_test(pcre2_grep_test sh ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + endif() + endif() + + if(WIN32) + # Provide environment for executing the bat file version of RunTest + file(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc) + file(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin) + + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_test.bat + "\@REM This is a generated file. +\@echo off +setlocal +SET srcdir=\"${winsrc}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" +call %srcdir%\\RunTest.bat +if errorlevel 1 exit /b 1 +echo RunTest.bat tests successfully completed +" + ) + + add_test(NAME pcre2_test_bat COMMAND pcre2_test.bat) + set_tests_properties(pcre2_test_bat PROPERTIES PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed") + + if(PCRE2_BUILD_PCRE2GREP) + file( + WRITE + ${PROJECT_BINARY_DIR}/pcre2_grep_test.bat + "\@REM This is a generated file. +\@echo off +setlocal +SET srcdir=\"${winsrc}\" +SET pcre2test=\"${winbin}\\pcre2test.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2test=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2test.exe\" +SET pcre2grep=\"${winbin}\\pcre2grep.exe\" +if not [%CMAKE_CONFIG_TYPE%]==[] SET pcre2grep=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcre2grep.exe\" +call %srcdir%\\RunGrepTest.bat +if errorlevel 1 exit /b 1 +echo RunGrepTest.bat tests successfully completed +" + ) + + add_test(NAME pcre2_grep_test_bat COMMAND pcre2_grep_test.bat) + set_tests_properties( + pcre2_grep_test_bat + PROPERTIES PASS_REGULAR_EXPRESSION "RunGrepTest\\.bat tests successfully completed" + ) + endif() + + if("$ENV{OSTYPE}" STREQUAL "msys") + # Both the sh and bat file versions of RunTest are run if make test is used + # in msys + add_test(pcre2_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre2_test.sh) + if(PCRE2_BUILD_PCRE2GREP) + add_test(pcre2_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh) + endif() + endif() + endif() + + # Changed to accommodate testing whichever location was just built + + if(PCRE2_SUPPORT_JIT) + add_test(pcre2_jit_test pcre2_jit_test) + endif() + + if(PCRE2_BUILD_PCRE2_8) + add_test(pcre2posix_test pcre2posix_test) + endif() +endif() + +# Installation + +set(CMAKE_INSTALL_ALWAYS 1) + +install( + TARGETS ${TARGETS} + RUNTIME DESTINATION bin + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +install(FILES ${pkg_config_files} DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2-config" + DESTINATION bin + # Set 0755 permissions + PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE +) + +install(FILES ${PCRE2_HEADERS} ${PCRE2POSIX_HEADERS} DESTINATION include) + +# CMake config files. +set(PCRE2_CONFIG_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config.cmake.in) +set(PCRE2_CONFIG_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config.cmake) +configure_file(${PCRE2_CONFIG_IN} ${PCRE2_CONFIG_OUT} @ONLY) +set(PCRE2_CONFIG_VERSION_IN ${CMAKE_CURRENT_SOURCE_DIR}/cmake/pcre2-config-version.cmake.in) +set(PCRE2_CONFIG_VERSION_OUT ${CMAKE_CURRENT_BINARY_DIR}/cmake/pcre2-config-version.cmake) +configure_file(${PCRE2_CONFIG_VERSION_IN} ${PCRE2_CONFIG_VERSION_OUT} @ONLY) +install(FILES ${PCRE2_CONFIG_OUT} ${PCRE2_CONFIG_VERSION_OUT} DESTINATION "${PCRE2_INSTALL_CMAKEDIR}") + +file(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html ${PROJECT_SOURCE_DIR}/doc/html/*.txt) +file( + GLOB txts + ${PROJECT_SOURCE_DIR}/doc/*.txt + AUTHORS.md + COPYING + ChangeLog + LICENCE.md + NEWS + README + SECURITY.md +) +file(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) +file(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3) + +install(FILES ${man1} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) +install(FILES ${man3} DESTINATION ${CMAKE_INSTALL_MANDIR}/man3) +install(FILES ${txts} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2) +install(FILES ${html} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/pcre2/html) + +if(MSVC AND INSTALL_MSVC_PDB) + install(FILES ${DLL_PDB_FILES} DESTINATION bin CONFIGURATIONS RelWithDebInfo) + install(FILES ${DLL_PDB_DEBUG_FILES} DESTINATION bin CONFIGURATIONS Debug) +endif() + +# Help, only for nice output +if(BUILD_STATIC_LIBS) + set(BUILD_STATIC_LIBS ON) +else() + set(BUILD_STATIC_LIBS OFF) +endif() + +if(PCRE2_HEAP_MATCH_RECURSE) + message(WARNING "HEAP_MATCH_RECURSE is obsolete and does nothing.") +endif() + +if(PCRE2_SHOW_REPORT) + message(STATUS "") + message(STATUS "") + message(STATUS "PCRE2-${PCRE2_MAJOR}.${PCRE2_MINOR} configuration summary:") + message(STATUS "") + message(STATUS " Install prefix .................... : ${CMAKE_INSTALL_PREFIX}") + message(STATUS " C compiler ........................ : ${CMAKE_C_COMPILER}") + + if(CMAKE_C_FLAGS) + set(CFSP " ") + endif() + if(CMAKE_CONFIGURATION_TYPES) + foreach(config IN LISTS CMAKE_CONFIGURATION_TYPES) + string(TOUPPER "${config}" buildtype) + string(LENGTH " (${config})" buildtypelen) + math(EXPR dotslen "18 - ${buildtypelen}") + string(REPEAT "." ${dotslen} dots) + message(STATUS " C compiler flags (${config}) ${dots} : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}") + endforeach() + else() + string(TOUPPER "${CMAKE_BUILD_TYPE}" buildtype) + message(STATUS " C compiler flags .................. : ${CMAKE_C_FLAGS}${CFSP}${CMAKE_C_FLAGS_${buildtype}}") + endif() + + message(STATUS "") + if(CMAKE_CONFIGURATION_TYPES) + message(STATUS " Build configurations .............. : ${CMAKE_CONFIGURATION_TYPES}") + else() + message(STATUS " Build type ........................ : ${CMAKE_BUILD_TYPE}") + endif() + message(STATUS " Build 8 bit PCRE2 library ......... : ${PCRE2_BUILD_PCRE2_8}") + message(STATUS " Build 16 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_16}") + message(STATUS " Build 32 bit PCRE2 library ........ : ${PCRE2_BUILD_PCRE2_32}") + message(STATUS " Include debugging code ............ : ${PCRE2_DEBUG}") + message(STATUS " Enable JIT compiling support ...... : ${PCRE2_SUPPORT_JIT}") + message(STATUS " Use SELinux allocator in JIT ...... : ${PCRE2_SUPPORT_JIT_SEALLOC}") + message(STATUS " Enable Unicode support ............ : ${PCRE2_SUPPORT_UNICODE}") + message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}") + message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}") + message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}") + message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}") + message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}") + message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}") + message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}") + message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}") + message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}") + message(STATUS " Heap limit ........................ : ${PCRE2_HEAP_LIMIT}") + message(STATUS " Match limit ....................... : ${PCRE2_MATCH_LIMIT}") + message(STATUS " Match depth limit ................. : ${PCRE2_MATCH_LIMIT_DEPTH}") + message(STATUS " Build shared libs ................. : ${BUILD_SHARED_LIBS}") + message(STATUS " Build static libs ................. : ${BUILD_STATIC_LIBS}") + message(STATUS " with PIC enabled ............... : ${PCRE2_STATIC_PIC}") + message(STATUS " Build pcre2grep ................... : ${PCRE2_BUILD_PCRE2GREP}") + message(STATUS " Enable JIT in pcre2grep ........... : ${PCRE2GREP_SUPPORT_JIT}") + message(STATUS " Enable callouts in pcre2grep ...... : ${PCRE2GREP_SUPPORT_CALLOUT}") + message(STATUS " Enable callout fork in pcre2grep .. : ${PCRE2GREP_SUPPORT_CALLOUT_FORK}") + message(STATUS " Buffer size for pcre2grep ......... : ${PCRE2GREP_BUFSIZE}") + message(STATUS " Build tests (implies pcre2test .... : ${PCRE2_BUILD_TESTS}") + message(STATUS " and pcre2grep)") + if(ZLIB_FOUND) + message(STATUS " Link pcre2grep with libz .......... : ${PCRE2_SUPPORT_LIBZ}") + else() + message(STATUS " Link pcre2grep with libz .......... : Library not found") + endif() + if(BZIP2_FOUND) + message(STATUS " Link pcre2grep with libbz2 ........ : ${PCRE2_SUPPORT_LIBBZ2}") + else() + message(STATUS " Link pcre2grep with libbz2 ........ : Library not found") + endif() + if(EDITLINE_FOUND) + message(STATUS " Link pcre2test with libeditline ... : ${PCRE2_SUPPORT_LIBEDIT}") + else() + message(STATUS " Link pcre2test with libeditline ... : Library not found") + endif() + if(READLINE_FOUND) + message(STATUS " Link pcre2test with libreadline ... : ${PCRE2_SUPPORT_LIBREADLINE}") + else() + message(STATUS " Link pcre2test with libreadline ... : Library not found") + endif() + message(STATUS " Support Valgrind .................. : ${PCRE2_SUPPORT_VALGRIND}") + if(PCRE2_DISABLE_PERCENT_ZT) + message(STATUS " Use %zu and %td ................... : OFF") + else() + message(STATUS " Use %zu and %td ................... : AUTO") + endif() + + if(MINGW AND BUILD_SHARED_LIBS) + message(STATUS " Non-standard dll names (prefix) ... : ${NON_STANDARD_LIB_PREFIX}") + message(STATUS " Non-standard dll names (suffix) ... : ${NON_STANDARD_LIB_SUFFIX}") + endif() + + if(MSVC) + message(STATUS " Install MSVC .pdb files ........... : ${INSTALL_MSVC_PDB}") + endif() + + message(STATUS "") +endif() + +# end CMakeLists.txt diff --git a/3rd/pcre2/COPYING b/3rd/pcre2/COPYING new file mode 100644 index 00000000..c233950f --- /dev/null +++ b/3rd/pcre2/COPYING @@ -0,0 +1,5 @@ +PCRE2 LICENCE + +Please see the file LICENCE in the PCRE2 distribution for licensing details. + +End diff --git a/3rd/pcre2/ChangeLog b/3rd/pcre2/ChangeLog new file mode 100644 index 00000000..5217d078 --- /dev/null +++ b/3rd/pcre2/ChangeLog @@ -0,0 +1,3285 @@ +Change Log for PCRE2 +-------------------- + +Before the move to GitHub, this was the only record of changes to PCRE2. Now +there is also the log of commit messages. + +Internal changes which are not visible to clients of the library are mostly not +listed here. + +Version 10.45 05-February-2025 +------------------------------ + +1. (#418) Change 6 of 10.44 broke 32-bit tests because pcre2test's reporting of +memory size was changed to the entire compiled data block, instead of just the +pattern and tables data, so as to align with the new length restriction. +Because the block's header contains pointers, this meant the pcre2test output +was different in 32-bit mode. A patch by Carlo reverts to the previous state +and makes sure that any limit set by pcre2_set_max_pattern_compiled_length() +also avoids the internal struct overhead. + +2. (#416, #622) Updates to build.zig. + +3. (#427, et al.) Various fixes to pacify static analyzers. + +4. (#428) Add --posix-pattern-file to pcre2grep to allow processing of empty +patterns through the -f option, as well as patterns that end in space +characters, for compatibility with other grep tools. + +5. (4fa5b8bd) Fix a bug in the fuzz support quantifier-limiting code. It ignores +strings of more than 5 digits because they are necessarily numbers greater than +65535, the largest legal quantifier. However, it wasn't ignoring non-significant +leading zeros. + +6. (6d82f0cd) The case-independent processing of the letter-matching Unicode +properties Ll, Lt, and Lu have been changed to match Perl (which changed a while +ago). When caseless matching is in force, all three of these properties are now +treated as Lc (cased letter). + +7. (#433) The pcre2_jit_compile() function was updated by the addition of a new +option PCRE2_JIT_TEST_ALLOC which, if called with a NULL first argument, tests +not only the availability of JIT, but also its ability to allocate executable +memory. Update pcre2test to use this support to extend the -C option. + +8. (75b1025a) The code for parsing Unicode property descriptions for \p and \P +been changed as follows: + + . White space etc. before ^ in a negated value such as \p{ ^L } was not being + ignored. + + . The code wouldn't have worked if PCRE2 was compiled for UTF-8 support + within an EBCDIC environment. Possibly nobody does this any more, but it + should now work. + + . The documentation of the syntax of what can follow \p and \P has been + updated. + +9. (1c24ba01) There was an error in the table of lengths for parsed items for +the OPTIONS item, but fortuitously it could never have actually bitten. While +fixing this, some other code that could never be obeyed was discovered and +removed. + +10. (674b6640) Removed some incorect optimization code from DFA matching that +has been there since PCRE1, but has just been found to cause a no match return +instead of a partial match in some cases. It involves partial matching when (*F) +is present so is unlikely to have actually affected anyone. + +11. (b0f4ac17) Tidy the wording and formatting of some pcre2test error messages +concerned with bad modifiers. Also restrict single-letter modifier sequences to +the first item in a modifier list, as documented and always intended. + +12. (1415565c) An iterator at the end of many assertions can always be +auto-possessified, but not at the end of variable-length lookbehinds. There was +a bug in the code that checks for such a lookbehind; it was looking only at the +first branch, which is wrong because some branches can be fixed length when +others are not, for example (?<=AB|CD?). Now all branches are checked for +variability. + +13. (ead08288) Matching with pcre2_match() could give an incorrect result if a +variable-length lookbehind was used as the condition in a conditional group. +The condition could erroneously be treated as true if a branch matched but +overran the current position. This bug was in the interpreter only; matching +with JIT was correct. + +14. (#443) Split out the sljit sub-project into a "Git submodule". Git users +must now run `git submodule init; git submodule update` after a Git checkout, or +the build will fail due to missing files in deps/sljit. + +15. (#441) Add a new error code (PCRE2_ERROR_JIT_UNSUPPORTED) which is yielded +for unsupported jit features. + +16. (#444) Fix bug in 'first code unit' and 'last code unit' optimization +combined with lookahead assertions. + +17. (#445, #447, #449, #451, #452, #459, #563) Add a new feature called scan +substring. This feature is a new type of assertion which matches the content of +a capturing block to a sub-pattern. + +18. (#450) Improvements to 'first code unit' / 'starting code units' +optimisation. + +19. (#455) Many, many improvements to the JIT compiler. + +20. Item 43 of 10.43 was incomplete because it addressed only \z and not \Z, +which was still misbehaving when matching fragments inside invalid UTF strings. + +21. (d29e7290) Octal escapes of the form \045 or \111 were not being recognized +in substitution strings, and if encountered gave an error, though the \o{...} +form was recognized. This bug is now fixed. + +22. (#463, #487) Fix 1 byte out-of-bounds read when parsing malformed limits +(e.g. LIMIT_HEAP) + +23. Many improvements to test infrastructure. Many more platforms and +configurations are now run in Continuous Integration, and all the platforms now +run the full test suite, rather than a partial subset. + +24. (#475) Implement title casing in substitution strings using Perl syntax. + +25. (#478, #504) Disallow \x if not followed by { or a hex digit. + +26. (#473) Implements Python-style backrefs in substitutions. + +27. (#472) Fix error reporting for certain over-large octal escapes. + +28. (#482) Fix parsing of named captures in replacement strings, allowing +non-ASCII capture names to be used. + +29. (#477, #474, #488, #494, #496, #506, #508, #511, #518, #524, #540) Many +improvements to parsing and optimising of character classes. + +30. (#483, #498) Add support for \g and $ to replacement strings. + +31. (#470) Add option flags PCRE2_EXTRA_NO_BS0 and PCRE2_EXTRA_PYTHON_OCTAL. + +32. (#471) Add new API function pcre2_set_optimize() for controlling which +optimizations are enabled. + +33. (#491) Adds $& $` $' and $_ to substitution replacements, as well as +interpreting \b and \v as characters. + +34. (#499) Add option PCRE2_EXTRA_NEVER_CALLOUT to disable callouts. + +35. (#503, #513) Update Unicode support to UCD 16. + +36. (#512, #618, #638) Add new function pcre2_set_substitute_case_callout() to +allow clients to provide a custom callback with locale-aware case +transformation. + +37. (#516) Fix case-insensitive matching of backreferences when using the +PCRE2_EXTRA_CASELESS_RESTRICT option. + +38. (#519) In pcre2grep, add $& as an alias for $0 + +39. (c9bf8339, #534) Updated perltest.sh to enable locale setting. + +40. (#521) Add support for Turkish I casefolding, using new options +PCRE2_EXTRA_TURKISH_CASING, and added pre-pattern flags (*TURKISH_CASING) and +(*CASELESS_RESTRICT). + +41. (#523, #546, #547) Add support for UTS#18 compatible character classes, +using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a metacharacter +within character classes and the operators '&&', '--' and '~~', allowing +subtractions and intersections of character classes to be easily expressed. + +42. (#553, #586, #596, #597) Add support for Perl-style extended character +classes, using the syntax (?[...]). This also allows expressing subtractions and +intersections of character classes, but using a different syntax to UTS#18. + +43. (#554) Fixed a bug in JIT affecting greedy bounded repeats. The upper limit +of repeats inside a repeated bracket might be incorrectly checked. + +44. (#556) Fixed a bug in JIT affecting caseful matching of backreferences. When +utf is disabled, and dupnames is enabled, caseless matching was used even +if caseful matching was needed. + +45. (f34fc0a3) Fixed a bug in pcre2grep reported by Alejandro Colomar + (GitHub issue #577). In certain cases, when lines of above and +below context were contiguous, a separator line was incorrectly being inserted. + +46. (#594) Fix a small (one/two byte) out-of-bounds read on invalid UTF-8 input +in pcre2grep. + +47. (#370) Fix the INSTALL_MSVC_PDB CMake flag. + +48. (#366) Install cmake files in prefix/lib/cmake/pcre2 rather than +prefix/cmake. The new CMake flag PCRE2_INSTALL_CMAKEDIR allows customising this +location. + +49. (#624, #626, #628, #632, #639, #641) Reduce code size of generated JIT code +for repeated character classes. + +50. (#623) Update the Bazel build files. + + +Version 10.44 07-June-2024 +-------------------------- + +1. If a pattern contained a variable-length lookbehind in which the first +branch was not the one with the shortest minimum length, and the lookbehind +contained a capturing group, and elsewhere in the pattern there was another +lookbehind that referenced that group, the pattern was incorrectly compiled, +leading to unpredictable results, including crashes in JIT compiling. An +example pattern is: /(((?<=123?456456|ABC)))(?<=\2)/ + +2. Further updates to the oss-fuzz support: + + (a) Limit quantifiers for groups and classes to be no more than 10. This + avoids very long JIT compile times that happen in some cases when groups + are replicated for quantification, and very long match times when + classes contain a lot of non-ascii characters. + + (b) Added PCRE2_EXTENDED_MORE to the list of allowed options. + + (c) Arranged for text error messages to be shown in 16-bit and 32-bit modes. + + (d) Made the output in standalone mode more readable. + + (e) General code tidies. + + (f) Limit the size of compiled patterns to 10MB (see 6 below). + + (g) Do not run JIT on patterns whose compiled length is greater than 200K + bytes because this takes a long time, causing oss-fuzz to time out. + + (h) Avoid compiling or matching twice with the same options (this could + happen if the input didn't set any options). + +3. Increase the maximum length of a name for a group from 32 to 128 because +there is a user for whom 32 is too small. + +4. Cause pcre2test to output a message when pcre2_jit_compile() gives an error +return if either jitverify or info is specified. + +5. Some auxiliary files for building under OpenVMS that were contributed by +Alexey Chupahin have been installed. + +6. Added pcre2_set_max_pattern_compiled_length() to limit the size of compiled +patterns. + +7. There was a bug in the implementation of \X caused by my (PH) misreading or +misunderstanding one of the grapheme sequence breaking rules in Unicode Annex +#29. A break should occur between two characters with the Extended Pictographic +break property unless a zero-width joiner intervenes. PCRE2 was not insisting +on the ZWJ, causing \X to match more than it should. See GitHub issue #410. + +8. Avoid compilation issues with proprietary compilers in UNIX since 10.43. + + +Version 10.43 16-February-2024 +------------------------------ + +1. The test program added by change 2 of 10.42 didn't work when the default +newline setting didn't include \n as a newline. One test needed (*LF) to ensure +that it worked. + +2. Added the new freestanding POSIX test program to the ManyConfigTests script +in the maint directory (overlooked in 2 below). Also improved the selection +facilities in that script, and added a test with JIT in a non-source directory, +fixing an oversight that would have made such a test fail before. + +3. Added pcre2_get_match_data_heapframes_size() and related pcre2test flags +to allow for finer control of the heap used when pcre2_match() without JIT is +used and the match_data might be reused. This began as PR #191, but has had +further refinement and documentation edits. + +4. Applied PR #181, which tidies some casts in pcre2_valid_utf.c. + +5. Applied PR #184, which avoids overflow issues with the heap limit +(introduced in 10.41/9). + +6. Applied PR #192, which changes the timing units for pcre2test from +milliseconds to microseconds. This is more useful for modern CPUs. + +7. Applied PR #193, which makes the requirement for C99 explicit in +configure.ac and CMakeLists.txt. + +8. Fixed a bug in pcre2test when a ridiculously large string repeat required a +stupid amount of memory. It now gives a clean realloc() failure error. + +9. Updates to restrict the interaction between ASCII and non-ASCII characters +for caseless matching and items like \d: + + (a) Added PCRE2_EXTRA_CASELESS_RESTRICT to lock out mixing of ASCII and + non-ASCII when matching caselessly. This is also /r in pcre2test and + (?r) within patterns. + + (b) Added PCRE2_EXTRA_ASCII_{BSD,BSS,BSW,POSIX} and corresponding (?aD) etc + in patterns and /a in pcre2test. + + (c) Corresponding updates to pcre2test. + +10. Unicode has been updated to 15.0.0. + +11. The Python scripts and ucptest.c in maint have been updated (a) a minor +change needed for 9(a) above; (b) fix bugs in ucptest, + +12. Integer overflow testing is now centralized in a new function. + +13. Made PCRE2_UCP the default in UTF mode in pcre2grep, and added new options +--case-restrict and --no-ucp. + +14. In the debugging printint module (which is normally only linked into +pcre2test), avoid the use of a variable called "not" because that's deprecated +in C and forbidden in C++. Also rewrite some code to avoid a goto into a block +that bypassed its initialization (though it didn't actually matter). + +15. More minor code adjustments to avoid using reserved C++ words as variable +names ("new" and "typename") and another jump that bypassed an (irrelevant) +initialization. + +16. Merged a pull request that removed pcre2_ucptables.c from the list of files +to compile in NON-AUTOTOOLS-BUILD because it is #included in pcre2_tables.c. +Also adjusted the BUILD.bazel and build.zig files, which had the same issue. At +the same time, fixed a typo in the Bazel file. + +17. Add PCRE2_EXTRA_ASCII_DIGIT to allow [:digit:] to be kept on sync with \d +even in UCP mode. + +18. Fix an invalid match of ascii word classes when invalid utf is enabled. + +19. Add a --posix-digit to pcre2grep for compatibility with GNU grep, and +other tools that prefer the POSIX compatible unicode definition for \d. + +20. Report the bit width of the library in use by pcre2test for usability. + +21. A pathological pattern conversion test could result in a string longer than +the available input buffer. Cause such a test to fail. + +22. Add a check that forces a compiler error if PCRE2_CODE_UNIT_WIDTH is not 8, +16, or 32 when compiling any of the library modules. + +23. Update pcre2_compile() to treat a NULL pattern with zero length as an empty +string. + +24. Add support for limited-length variable-length lookbehind assertions, with +default maximum length 255 characters (same as Perl) but with a function to +adjust the limit. + +25. Applied pull request #262, which updates the zig configuration, and #278 +which fixes a bug with out-of-source-tree CMake build testing. + +26. Add support for LoongArch to JIT. + +27. Fixed a bug in pcre2_match() in the code for handling the vector of +backtracking frames on the heap, which caused a heap overflow if *LIMIT_HEAP +restricted an attempt to extend to less than the frame size. Generally tidy up +the code for extending the heap frames vector. This fixes GitHub issue #275. + +28. Update pcre2_fuzzsupport.c to avoid clang sanitize complaint about shifting +left by 16 when there are non-zeros in the top 16 bits. + +29. Perl 5.34.0 changed the meaning of (for example) {,3} which did not used to +be treated as a quantifier. Now it is interpreted as {0,3} and PCRE2 has +changed to match. Note that {,} is still not a quantifier. + +30. Perl allows spaces and/or horizontal tabs after { or before } in all items +that use braces, and also before or after the comma in quantifiers. PCRE2 now +does the same, except for \u{...}, which is recognized only when +PCRE2_EXTRA_ALT_BSUX is set. This an ECMAScript, non-Perl compatible, +extension, so PCRE2 follows ECMAScript rather than Perl. + +31. Applied pull request #300 by Carlo, which fixes #261. The bug was that +pcre2_match() was not fully resetting all captures that had been set within a +(possibly recursive) subroutine call such as (?3). + +32. Changed the meaning of \w (and its synonyms) in UCP mode to match Perl. It +now matches characters whose general categories are L or N or whose particular +categories are Mn (non-spacing mark) or Pc (combining punctuation). The latter +includes underscore. + +33. Changed the meaning of [:xdigit:] in UCP mode to match Perl. It now also +matches the "fullwidth" versions of the hex digits. Just like it is done for +[:digit:], PCRE2_EXTRA_ASCII_DIGIT can be used to keep this class ASCII only +without affecting other POSIX classes. + +34. GitHub PR305 fixes a potential integer overflow in pcre2_dfa_match(). + +35. Updated handling of \b and \B in UCP mode to match the changes to \w in 32 +above because \b and \B are defined in terms of \w. + +36. Within a pattern (?aT) and (?-aT) set and reset the PCRE2_EXTRA_ASCII_DIGIT +option, and (?aP) also sets (?aT) so that (?-aP) disables all ASCII +restrictions on POSIX classes. + +37. If PCRE2_FIRSTLINE was set on an anchored pattern, pcre2_match() and +pcre2_dfa_match() misbehaved. PCRE2_FIRSTLINE is now ignored for anchored +patterns. + +38. Add a test for ridiculous ovector offset values to the substring extraction +functions. + +39. Make OP_REVERSE use IMM2_SIZE for its data instead of LINK_SIZE, for +consistency with OP_VREVERSE. + +40. In some legacy environments with a pre C99 snprintf, pcre2_regerror could +return an incorrect value when the provided buffer was too small. + +41. Applied pull request #342 which adds sanity checks for ctype functions and +locks out any accidental sign-extension. + +42. In the 32-bit library, in non-UTF mode, a quantifier that followed a +literal character with a value greater than or equal to 0x80000000u caused +undefined behaviour. + +43. \z was misbehaving when matching fragments inside invalid UTF strings. + +44. Implement --group-separator and --no-group-separator for pcre2grep. + +45. Fix \X matching in 32 bit mode without UTF in JIT. + +46. Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set in JIT. + +47. Refactor the handling of whole-pattern recursion (?0) in pcre2_match() so +that its end is handled similarly to other recursions. This has altered the +behaviour of /|(?0)./endanchored which was previously not right. + +48. Improved the test for looping recursion by checking the last referenced +character as well as the current character. This allows some patterns that +previously triggered the check to run to completion instead of giving the loop +error. + +49. In 32-bit mode, the compiler looped for the pattern /[\x{ffffffff}]/ when +PCRE2_CASELESS and PCRE2_UCP (but not PCRE2_UTF) were set. Fixed by not trying +to look for other cases for characters above the Unicode range. + +50. In caseless 32-bit mode with UCP (but not UTF) set, the character +0xffffffff incorrectly matched any character that has more than one other case, +in particular k and s. + +51. Fix accept and endanchored interaction in JIT. + +52. Fix backreferences with unset backref and non-greedy iterators in JIT. + +53. Improve the logic that checks for a list of starting code units -- positive +lookahead assertions are now ignored if the immediately following item is one +that sets a mandatory starting character. For example, /a?(?=bc|)d/ used to set +all of a, b, and d as possible starting code units; now it sets only a and d. + +54. Fix incorrect class character matches in JIT. + +55. In pcre2test, ensure pcre2_jit_match() is used when jitfast is used with +substitution testing. + +56. Insert omitted setting of subject length in match data at the end of +pcre2_jit_match(). + +57. Implemented PCRE2_DISABLE_RECURSELOOP_CHECK for pcre2_match() to enable +some apparently looping recursions to run to completion and therefore match the +JIT behaviour. With this set, real loops will eventually get caught by match or +heap limits or run out of resource. + +58. AC did a lot of work on pcre2_fuzzsupport.c to extend it to 16-bit and +32-bit libraries and to compare JIT and non-JIT matching. + + +Version 10.42 11-December-2022 +------------------------------ + +1. Change 19 of 10.41 wasn't quite right; it put the definition of a default, +empty value for PCRE2_CALL_CONVENTION in src/pcre2posix.c instead of +src/pcre2posix.h, which meant that programs that included pcre2posix.h but not +pcre2.h failed to compile. + +2. To catch similar issues to the above in future, a new small test program +that includes pcre2posix.h but not pcre2.h has been added to the test suite. + +3. When the -S option of pcre2test was used to set a stack size greater than +the allowed maximum, the error message displayed the hard limit incorrectly. +This was pointed out on GitHub pull request #171, but the suggested patch +didn't cope with all cases. Some further modification was required. + +4. Supplying an ovector count of more than 65535 to pcre2_match_data_create() +caused a crash because the field in the match data block is only 16 bits. A +maximum of 65535 is now silently applied. + +5. Merged @carenas patch #175 which fixes #86 - segfault on aarch64 (ARM), + +6. The prototype for pcre2_substring_list_free() specified its argument as +PCRE2_SPTR * which is a const data type, whereas the yield from +pcre2_substring_list() is not const. This caused compiler warnings. I have +changed the argument of pcre2_substring_list_free() to be PCRE2_UCHAR ** to +remove this anomaly. This might cause new warnings in existing code where a +cast has been used to avoid previous ones. + + +Version 10.41 06-December-2022 +------------------------------ + +1. Add fflush() before and after a fork callout in pcre2grep to get its output +to be the same on all systems. (There were previously ordering differences in +Alpine Linux). + +2. Merged patch from @carenas (GitHub #110) for pthreads support in CMake. + +3. SSF scorecards grumbled about possible overflow in an expression in +pcre2test. It never would have overflowed in practice, but some casts have been +added and at the some time there's been some tidying of fprints that output +size_t values. + +4. PR #94 showed up an unused enum in pcre2_convert.c, which is now removed. + +5. Minor code re-arrangement to remove gcc warning about realloc() in +pcre2test. + +6. Change a number of int variables that hold buffer and line lengths in +pcre2grep to PCRE2_SIZE (aka size_t). + +7. Added an #ifdef to cut out a call to PRIV(jit_free) when JIT is not +supported (even though that function would do nothing in that case) at the +request of a user who doesn't even want to link with pcre_jit_compile.o. Also +tidied up an untidy #ifdef arrangement in pcre2test. + +8. Fixed an issue in the backtracking optimization of character repeats in +JIT. Furthermore optimize star repetitions, not just plus repetitions. + +9. Removed the use of an initial backtracking frames vector on the system stack +in pcre2_match() so that it now always uses the heap. (In a multi-thread +environment with very small stacks there had been an issue.) This also is +tidier for JIT matching, which didn't need that vector. The heap vector is now +remembered in the match data block and re-used if that block itself is re-used. +It is freed with the match data block. + +10. Adjusted the find_limits code in pcre2test to work with change 9 above. + +11. Added find_limits_noheap to pcre2test, because the heap limits are now +different in different environments and so cannot be included in the standard +tests. + +12. Created a test for pcre2_match() heap processing that is not part of the +tests run by 'make check', but can be run manually. The current output is from +a 64-bit system. + +13. Implemented -Z aka --null in pcre2grep. + +14. A minor change to pcre2test and the addition of several new pcre2grep tests +have improved LCOV coverage statistics. At the same time, code in pcre2grep and +elsewhere that can never be obeyed in normal testing has been excluded from +coverage. + +15. Fixed a bug in pcre2grep that could cause an extra newline to be written +after output generated by --output. + +16. If a file has a .bz2 extension but is not in fact compressed, pcre2grep +should process it as a plain text file. A bug stopped this happening; now fixed +and added to the tests. + +17. When pcre2grep was running not in UTF mode, if a string specified by +--output or obtained from a callout in a pattern contained a character (byte) +greater than 127, it was incorrectly output in UTF-8 format. + +18. Added some casts after warnings from Clang sanitize. + +19. Merged patch from cbouc (GitHub #139): 4 function prototypes were missing +PCRE2_CALL_CONVENTION in src/pcre2posix.h. All function prototypes returning +pointers had out of place PCRE2_CALL_CONVENTION in src/pcre2.h.*. These +produced errors when building for Windows with #define PCRE2_CALL_CONVENTION +__stdcall. + +20. A negative repeat value in a pcre2test subject line was not being +diagnosed, leading to infinite looping. + +21. Updated RunGrepTest to discard the warning that Bash now gives when setting +LC_CTYPE to a bad value (because older versions didn't). + +22. Updated pcre2grep so that it behaves like GNU grep when matching more than +one pattern and a later pattern matches at an earlier point in the subject when +the matched substrings are being identified by colour or by offsets. + +23. Updated the PrepareRelease script so that the man page that it makes for +the pcre2demo demonstration program is more standard and does not cause errors +when processed by lexgrog or mandb -c (GitHub issue #160). + +24. The JIT compiler was updated. + + +Version 10.40 15-April-2022 +--------------------------- + +1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect +handling of multiple passes. + +2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue +in pcre2grep with buffered fseek(stdin). + +3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is +not supported. + +4. Revert an unintended change in JIT repeat detection. + +5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd. + +6. Merged documentation and comments patches from @carenas (GitHub #47). + +7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code +from pcre2grep. + +8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46. + +9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and +substituting. + +10. Add null_subject and null_replacement modifiers to pcre2test. + +11. Add check for NULL subject to POSIX regexec() function. + +12. Add check for NULL replacement to pcre2_substitute(). + +13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and +pcre2_substitute(), and the replacement argument of the latter, if the pointer +is NULL and the length is zero, treat as an empty string. Apparently a number +of applications treat NULL/0 in this way. + +14. Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +15. Fix some minor issues raised by clang sanitize. + +16. Very minor code speed up for maximizing character property matches. + +17. A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +18. The Python scripts in the maint directory have been refactored. There are +now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c +(which is #included by pcre2_tables.c). The data lists that used to be +duplicated are now held in a single common Python module. + +19. On CHERI, and thus Arm's Morello prototype, pointers are represented as +hardware capabilities, which consist of both an integer address and additional +metadata, meaning they are twice the size of the platform's size_t type, i.e. +16 bytes on a 64-bit system. The ovector member of heapframe happens to only be +8 byte aligned, and so computing frame_size ended up with a multiple of 8 but +not 16. Whilst the first frame was always suitably aligned, this then +misaligned the frame that follows, resulting in an alignment fault when storing +a pointer to Fecode at the start of match. Patch to fix this issue by Jessica +Clarke PR#72. + +20. Added -LP and -LS listing options to pcre2test. + +21. A user discovered that the library names in CMakeLists.txt for MSVC +debugger (PDB) files were incorrect - perhaps never tried for PCRE2? + +22. An item such as [Aa] is optimized into a caseless single character match. +When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a +pattern, the optimizing "must be present for a match" character check was not +being flagged as caseless, causing some matches that should have succeeded to +fail. + +23. Fixed a unicode property matching issue in JIT. The character was not +fully read in caseless matching. + +24. Fixed an issue affecting recursions in JIT caused by duplicated data +transfers. + +25. Merged patch from @carenas (GitHub #96) which fixes some problems with +pcre2test and readline/readedit: + + * Use the right header for libedit in FreeBSD with autoconf + * Really allow libedit with cmake + * Avoid using readline headers with libedit + + +Version 10.39 29-October-2021 +----------------------------- + +1. Fix incorrect detection of alternatives in first character search in JIT. + +2. Merged patch from @carenas (GitHub #28): + + Visual Studio 2013 includes support for %zu and %td, so let newer + versions of it avoid the fallback, and while at it, make sure that + the first check is for DISABLE_PERCENT_ZT so it will be always + honoured if chosen. + + prtdiff_t is signed, so use a signed type instead, and make sure + that an appropriate width is chosen if pointers are 64bit wide and + long is not (ex: Windows 64bit). + + IMHO removing the cast (and therefore the possibility of truncation) + make the code cleaner and the fallback is likely portable enough + with all 64-bit POSIX systems doing LP64 except for Windows. + +3. Merged patch from @carenas (GitHub #29) to update to Unicode 14.0.0. + +4. Merged patch from @carenas (GitHub #30): + + * Cleanup: remove references to no longer used stdint.h + + Since 19c50b9d (Unconditionally use inttypes.h instead of trying for stdint.h + (simplification) and remove the now unnecessary inclusion in + pcre2_internal.h., 2018-11-14), stdint.h is no longer used. + + Remove checks for it in autotools and CMake and document better the expected + build failures for systems that might have stdint.h (C99) and not inttypes.h + (from POSIX), like old Windows. + + * Cleanup: remove detection for inttypes.h which is a hard dependency + + CMake checks for standard headers are not meant to be used for hard + dependencies, so will prevent a possible fallback to work. + + Alternatively, the header could be checked to make the configuration fail + instead of breaking the build, but that was punted, as it was missing anyway + from autotools. + +5. Merged patch from @carenas (GitHub #32): + + * jit: allow building with ancient MSVC versions + + Visual Studio older than 2013 fails to build with JIT enabled, because it is + unable to parse non C89 compatible syntax, with mixed declarations and code. + While most recent compilers wouldn't even report this as a warning since it + is valid C99, it could be also made visible by adding to gcc/clang the + -Wdeclaration-after-statement flag at build time. + + Move the code below the affected definitions. + + * pcre2grep: avoid mixing declarations with code + + Since d5a61ee8 (Patch to detect (and ignore) symlink loops in pcre2grep, + 2021-08-28), code will fail to build in a strict C89 compiler. + + Reformat slightly to make it C89 compatible again. + + +Version 10.38 01-October-2021 +----------------------------- + +1. Fix invalid single character repetition issues in JIT when the repetition +is inside a capturing bracket and the bracket is preceded by character +literals. + +2. Installed revised CMake configuration files provided by Jan-Willem Blokland. +This extends the CMake build system to build both static and shared libraries +in one go, builds the static library with PIC, and exposes PCRE2 libraries +using the CMake config files. JWB provided these notes: + +- Introduced CMake variable BUILD_STATIC_LIBS to build the static library. + +- Make a small modification to config-cmake.h.in by removing the PCRE2_STATIC + variable. Added PCRE2_STATIC variable to the static build using the + target_compile_definitions() function. + +- Extended the CMake config files. + + - Introduced CMake variable PCRE2_USE_STATIC_LIBS to easily switch between + the static and shared libraries. + + - Added the PCRE_STATIC variable to the target compile definitions for the + import of the static library. + +Building static and shared libraries using MSVC results in a name clash of +the libraries. Both static and shared library builds create, for example, the +file pcre2-8.lib. Therefore, I decided to change the static library names by +adding "-static". For example, pcre2-8.lib has become pcre2-8-static.lib. +[Comment by PH: this is MSVC-specific. It doesn't happen on Linux.] + +3. Increased the minimum release number for CMake to 3.0.0 because older than +2.8.12 is deprecated (it was set to 2.8.5) and causes warnings. Even 3.0.0 is +quite old; it was released in 2014. + +4. Implemented a modified version of Thomas Tempelmann's pcre2grep patch for +detecting symlink loops. This is dependent on the availability of realpath(), +which is now tested for in ./configure and CMakeLists.txt. + +5. Implemented a modified version of Thomas Tempelmann's patch for faster +case-independent "first code unit" searches for unanchored patterns in 8-bit +mode in the interpreters. Instead of just remembering whether one case matched +or not, it remembers the position of a previous match so as to avoid +unnecessary repeated searching. + +6. Perl now locks out \K in lookarounds, so PCRE2 now does the same by default. +However, just in case anybody was relying on the old behaviour, there is an +option called PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK that enables the old behaviour. +An option has also been added to pcre2grep to enable this. + +7. Re-enable a JIT optimization which was unintentionally disabled in 10.35. + +8. There is a loop counter to catch excessively crazy patterns when checking +the lengths of lookbehinds at compile time. This was incorrectly getting reset +whenever a lookahead was processed, leading to some fuzzer-generated patterns +taking a very long time to compile when (?|) was present in the pattern, +because (?|) disables caching of group lengths. + + +Version 10.37 26-May-2021 +------------------------- + +1. Change RunGrepTest to use tr instead of sed when testing with binary +zero bytes, because sed varies a lot from system to system and has problems +with binary zeros. This is from Bugzilla #2681. Patch from Jeremie +Courreges-Anglas via Nam Nguyen. This fixes RunGrepTest for OpenBSD. Later: +it broke it for at least one version of Solaris, where tr can't handle binary +zeros. However, that system had /usr/xpg4/bin/tr installed, which works OK, so +RunGrepTest now checks for that command and uses it if found. + +2. Compiling with gcc 10.2's -fanalyzer option showed up a hypothetical problem +with a NULL dereference. I don't think this case could ever occur in practice, +but I have put in a check in order to get rid of the compiler error. + +3. An alternative patch for CMakeLists.txt because 10.36 #4 breaks CMake on +Windows. Patch from email@cs-ware.de fixes bugzilla #2688. + +4. Two bugs related to over-large numbers have been fixed so the behaviour is +now the same as Perl. + + (a) A pattern such as /\214748364/ gave an overflow error instead of being + treated as the octal number \214 followed by literal digits. + + (b) A sequence such as {65536 that has no terminating } so is not a + quantifier was nevertheless complaining that a quantifier number was too big. + +5. A run of autoconf suggested that configure.ac was out-of-date with respect +to the latest autoconf. Running autoupdate made some valid changes, some valid +suggestions, and also some invalid changes, which were fixed by hand. Autoconf +now runs clean and the resulting "configure" seems to work, so I hope nothing +is broken. Later: the requirement for autoconf 2.70 broke some automatic test +robots. It doesn't seem to be necessary: trying a reduction to 2.60. + +6. The pattern /a\K.(?0)*/ when matched against "abac" by the interpreter gave +the answer "bac", whereas Perl and JIT both yield "c". This was because the +effect of \K was not propagating back from the full pattern recursion. Other +recursions such as /(a\K.(?1)*)/ did not have this problem. + +7. Restore single character repetition optimization in JIT. Currently fewer +character repetitions are optimized than in 10.34. + +8. When the names of the functions in the POSIX wrapper were changed to +pcre2_regcomp() etc. (see change 10.33 #4 below), functions with the original +names were left in the library so that pre-compiled programs would still work. +However, this has proved troublesome when programs link with several libraries, +some of which use PCRE2 via the POSIX interface while others use a native POSIX +library. For this reason, the POSIX function names are removed in this release. +The macros in pcre2posix.h should ensure that re-compiling fixes any programs +that haven't been compiled since before 10.33. + + +Version 10.36 04-December-2020 +------------------------------ + +1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to +compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for +Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt +invented by PH. + +2. Fix infinite loop when a single byte newline is searched in JIT when +invalid utf8 mode is enabled. + +3. Updated CMakeLists.txt with patch from Wolfgang Stöggl (Bugzilla #2584): + + - Include GNUInstallDirs and use ${CMAKE_INSTALL_LIBDIR} instead of hardcoded + lib. This allows differentiation between lib and lib64. + CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for + pkgconfig file generation. + + - Add the version of PCRE2 to the configuration summary like ./configure + does. + + - Fix typo: MACTHED_STRING->MATCHED_STRING + +4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla +#2588): + + - Add escaped double quotes around include directory in CMakeLists.txt to + allow spaces in directory names. + + - This fixes a cmake error, if the path of the pcre2 source contains a space. + +5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's +documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST. +Moreover, these functions come from specific header files, which need to be +specified (and, thankfully, are the same on both the Linux and WinXX +platforms.) + +6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c. + +7. Applied a patch from Wolfgang Stöggl (Bugzilla #2600) to fix postfix for +debug Windows builds using CMake. This also updated configure so that it +generates *.pc files and pcre2-config with the same content, as in the past. + +8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a +single digit, the code unit beyond d was being read (i.e. there was a read +buffer overflow). Fixes ClusterFuzz 23779. + +9. After the rework in r1235, certain character ranges were incorrectly +handled by an optimization in JIT. Furthermore a wrong offset was used to +read a value from a buffer which could lead to memory overread. + +10. Unnoticed for many years was the fact that delimiters other than / in the +testinput1 and testinput4 files could cause incorrect behaviour when these +files were processed by perltest.sh. There were several tests that used quotes +as delimiters, and it was just luck that they didn't go wrong with perltest.sh. +All the patterns in testinput1 and testinput4 now use / as their delimiter. +This fixes Bugzilla #2641. + +11. Perl has started to give an error for \K within lookarounds (though there +are cases where it doesn't). PCRE2 still allows this, so the tests that include +this case have been moved from test 1 to test 2. + +12. Further to 10 above, pcre2test has been updated to detect and grumble if a +delimiter other than / is used after #perltest. + +13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS +was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding +the start of a match was not resetting correctly after a failed match on the +first valid fragment of the subject, possibly causing incorrect "no match" +returns on subsequent fragments. For example, the pattern /A/ failed to match +the subject \xe5A. Fixes Bugzilla #2642. + +14. Fixed a bug in character set matching when JIT is enabled and both unicode +scripts and unicode classes are present at the same time. + +15. Added GNU grep's -m (aka --max-count) option to pcre2grep. + +16. Refactored substitution processing in pcre2grep strings, both for the -O +option and when dealing with callouts. There is now a single function that +handles $ expansion in all cases (instead of multiple copies of almost +identical code). This means that the same escape sequences are available +everywhere, which was not previously the case. At the same time, the escape +sequences $x{...} and $o{...} have been introduced, to allow for characters +whose code points are greater than 255 in Unicode mode. + +17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit +test for a version of sed that can handle binary zero, instead of assuming that +any Linux version will work. Later: replaced $(...) by `...` because not all +shells recognize the former. + +18. Fixed a word boundary check bug in JIT when partial matching is enabled. + +19. Fix ARM64 compilation warning in JIT. Patch by Carlo. + +20. A bug in the RunTest script meant that if the first part of test 2 failed, +the failure was not reported. + +21. Test 2 was failing when run from a directory other than the source +directory. This failure was previously missed in RunTest because of 20 above. +Fixes added to both RunTest and RunTest.bat. + +22. Patch to CMakeLists.txt from Daniel to fix problem with testing under +Windows. + + +Version 10.35 09-May-2020 +--------------------------- + +1. Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT. + +2. Fix ARMv5 JIT improper handling of labels right after a constant pool. + +3. A JIT bug is fixed which allowed to read the fields of the compiled +pattern before its existence is checked. + +4. Back in the PCRE1 day, capturing groups that contained recursive back +references to themselves were made atomic (version 8.01, change 18) because +after the end a repeated group, the captured substrings had their values from +the final repetition, not from an earlier repetition that might be the +destination of a backtrack. This feature was documented, and was carried over +into PCRE2. However, it has now been realized that the major refactoring that +was done for 10.30 has made this atomizing unnecessary, and it is confusing +when users are unaware of it, making some patterns appear not to be working as +expected. Capture values of recursive back references in repeated groups are +now correctly backtracked, so this unnecessary restriction has been removed. + +5. Added PCRE2_SUBSTITUTE_LITERAL. + +6. Avoid some VS compiler warnings. + +7. Added PCRE2_SUBSTITUTE_MATCHED. + +8. Added (?* and (?<* as synonyms for (*napla: and (*naplb: to match another +regex engine. The Perl regex folks are aware of this usage and have made a note +about it. + +9. When an assertion is repeated, PCRE2 used to limit the maximum repetition to +1, believing that repeating an assertion is pointless. However, if a positive +assertion contains capturing groups, repetition can be useful. In any case, an +assertion could always be wrapped in a repeated group. The only restriction +that is now imposed is that an unlimited maximum is changed to one more than +the minimum. + +10. Fix *THEN verbs in lookahead assertions in JIT. + +11. Added PCRE2_SUBSTITUTE_REPLACEMENT_ONLY. + +12. The JIT stack should be freed when the low-level stack allocation fails. + +13. In pcre2grep, if the final line in a scanned file is output but does not +end with a newline sequence, add a newline according to the --newline setting. + +14. (?(DEFINE)...) groups were not being handled correctly when checking for +the fixed length of a lookbehind assertion. Such a group within a lookbehind +should be skipped, as it does not contribute to the length of the group. +Instead, the (DEFINE) group was being processed, and if at the end of the +lookbehind, that end was not correctly recognized. Errors such as "lookbehind +assertion is not fixed length" and also "internal error: bad code value in +parsed_skip()" could result. + +15. Put a limit of 1000 on recursive calls in pcre2_study() when searching +nested groups for starting code units, in order to avoid stack overflow issues. +If the limit is reached, it just gives up trying for this optimization. + +16. The control verb chain list must always be restored when exiting from a +recurse function in JIT. + +17. Fix a crash which occurs when the character type of an invalid UTF +character is decoded in JIT. + +18. Changes in many areas of the code so that when Unicode is supported and +PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for +upper/lower case computations on characters whose code points are greater than +127. + +19. The function for checking UTF-16 validity was returning an incorrect offset +for the start of the error when a high surrogate was not followed by a valid +low surrogate. This caused incorrect behaviour, for example when +PCRE2_MATCH_INVALID_UTF was set and a match started immediately following the +invalid high surrogate, such as /aa/ matching "\x{d800}aa". + +20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern +could be mis-compiled and therefore not match correctly. This is the example +that found this: /(?(DEFINE)(?bar))(? has been raised to +50, (b) the new --om-capture option changes the limit, (c) an error is raised +if -o asks for a group that is above the limit. + +12. The quantifier {1} was always being ignored, but this is incorrect when it +is made possessive and applied to an item in parentheses, because a +parenthesized item may contain multiple branches or other backtracking points, +for example /(a|ab){1}+c/ or /(a+){1}+a/. + +13. For partial matches, pcre2test was always showing the maximum lookbehind +characters, flagged with "<", which is misleading when the lookbehind didn't +actually look behind the start (because it was later in the pattern). Showing +all consulted preceding characters for partial matches is now controlled by the +existing "allusedtext" modifier and, as for complete matches, this facility is +available only for non-JIT matching, because JIT does not maintain the first +and last consulted characters. + +14. DFA matching (using pcre2_dfa_match()) was not recognising a partial match +if the end of the subject was encountered in a lookahead (conditional or +otherwise), an atomic group, or a recursion. + +15. Give error if pcre2test -t, -T, -tm or -TM is given an argument of zero. + +16. Check for integer overflow when computing lookbehind lengths. Fixes +Clusterfuzz issue 15636. + +17. Implemented non-atomic positive lookaround assertions. + +18. If a lookbehind contained a lookahead that contained another lookbehind +within it, the nested lookbehind was not correctly processed. For example, if +/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching +"b". + +19. Implemented pcre2_get_match_data_size(). + +20. Two alterations to partial matching: + + (a) The definition of a partial match is slightly changed: if a pattern + contains any lookbehinds, an empty partial match may be given, because this + is another situation where adding characters to the current subject can + lead to a full match. Example: /c*+(?<=[bc])/ with subject "ab". + + (b) Similarly, if a pattern could match an empty string, an empty partial + match may be given. Example: /(?![ab]).*/ with subject "ab". This case + applies only to PCRE2_PARTIAL_HARD. + + (c) An empty string partial hard match can be returned for \z and \Z as it + is documented that they shouldn't match. + +21. A branch that started with (*ACCEPT) was not being recognized as one that +could match an empty string. + +22. Corrected pcre2_set_character_tables() tables data type: was const unsigned +char * instead of const uint8_t *, as generated by pcre2_maketables(). + +23. Upgraded to Unicode 12.1.0. + +24. Add -jitfast command line option to pcre2test (to make all the jit options +available directly). + +25. Make pcre2test -C show if libreadline or libedit is supported. + +26. If the length of one branch of a group exceeded 65535 (the maximum value +that is remembered as a minimum length), the whole group's length was +incorrectly recorded as 65535, leading to incorrect "no match" when start-up +optimizations were in force. + +27. The "rightmost consulted character" value was not always correct; in +particular, if a pattern ended with a negative lookahead, characters that were +inspected in that lookahead were not included. + +28. Add the pcre2_maketables_free() function. + +29. The start-up optimization that looks for a unique initial matching +code unit in the interpretive engines uses memchr() in 8-bit mode. When the +search is caseless, it was doing so inefficiently, which ended up slowing down +the match drastically when the subject was very long. The revised code (a) +remembers if one case is not found, so it never repeats the search for that +case after a bumpalong and (b) when one case has been found, it searches only +up to that position for an earlier occurrence of the other case. This fix +applies to both interpretive pcre2_match() and to pcre2_dfa_match(). + +30. While scanning to find the minimum length of a group, if any branch has +minimum length zero, there is no need to scan any subsequent branches (a small +compile-time performance improvement). + +31. Installed a .gitignore file on a user's suggestion. When using the svn +repository with git (through git svn) this helps keep it tidy. + +32. Add underflow check in JIT which may occur when the value of subject +string pointer is close to 0. + +33. Arrange for classes such as [Aa] which contain just the two cases of the +same character, to be treated as a single caseless character. This causes the +first and required code unit optimizations to kick in where relevant. + +34. Improve the bitmap of starting bytes for positive classes that include wide +characters, but no property types, in UTF-8 mode. Previously, on encountering +such a class, the bits for all bytes greater than \xc4 were set, thus +specifying any character with codepoint >= 0x100. Now the only bits that are +set are for the relevant bytes that start the wide characters. This can give a +noticeable performance improvement. + +35. If the bitmap of starting code units contains only 1 or 2 bits, replace it +with a single starting code unit (1 bit) or a caseless single starting code +unit if the two relevant characters are case-partners. This is particularly +relevant to the 8-bit library, though it applies to all. It can give a +performance boost for patterns such as [Ww]ord and (word|WORD). However, this +optimization doesn't happen if there is a "required" code unit of the same +value (because the search for a "required" code unit starts at the match start +for non-unique first code unit patterns, but after a unique first code unit, +and patterns such as a*a need the former action). + +36. Small patch to pcre2posix.c to set the erroroffset field to -1 immediately +after a successful compile, instead of at the start of matching to avoid a +sanitizer complaint (regexec is supposed to be thread safe). + +37. Add NEON vectorization to JIT to speed up matching of first character and +pairs of characters on ARM64 CPUs. + +38. If a non-ASCII character was the first in a starting assertion in a +caseless match, the "first code unit" optimization did not get the casing +right, and the assertion failed to match a character in the other case if it +did not start with the same code unit. + +39. Fixed the incorrect computation of jump sizes on x86 CPUs in JIT. A masking +operation was incorrectly removed in r1136. Reported by Ralf Junker. + + +Version 10.33 16-April-2019 +--------------------------- + +1. Added "allvector" to pcre2test to make it easy to check the part of the +ovector that shouldn't be changed, in particular after substitute and failed or +partial matches. + +2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has +a greater than 1 fixed quantifier. This issue was found by Yunho Kim. + +3. Added support for callouts from pcre2_substitute(). After 10.33-RC1, but +prior to release, fixed a bug that caused a crash if pcre2_substitute() was +called with a NULL match context. + +4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper +functions that use the standard POSIX names. However, in pcre2posix.h the POSIX +names are defined as macros. This should help avoid linking with the wrong +library in some environments while still exporting the POSIX names for +pre-existing programs that use them. (The Debian alternative names are also +defined as macros, but not documented.) + +5. Fix an xclass matching issue in JIT. + +6. Implement PCRE2_EXTRA_ESCAPED_CR_IS_LF (see Bugzilla 2315). + +7. Implement the Perl 5.28 experimental alphabetic names for atomic groups and +lookaround assertions, for example, (*pla:...) and (*atomic:...). These are +characterized by a lower case letter following (* and to simplify coding for +this, the character tables created by pcre2_maketables() were updated to add a +new "is lower case letter" bit. At the same time, the now unused "is +hexadecimal digit" bit was removed. The default tables in +src/pcre2_chartables.c.dist are updated. + +8. Implement the new Perl "script run" features (*script_run:...) and +(*atomic_script_run:...) aka (*sr:...) and (*asr:...). + +9. Fixed two typos in change 22 for 10.21, which added special handling for +ranges such as a-z in EBCDIC environments. The original code probably never +worked, though there were no bug reports. + +10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via +pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast +path. Also, when a match fails, set the subject field in the match data to NULL +for tidiness - none of the substring extractors should reference this after +match failure. + +11. If a pattern started with a subroutine call that had a quantifier with a +minimum of zero, an incorrect "match must start with this character" could be +recorded. Example: /(?&xxx)*ABC(?XYZ)/ would (incorrectly) expect 'A' to +be the first character of a match. + +12. The heap limit checking code in pcre2_dfa_match() could suffer from +overflow if the heap limit was set very large. This could cause incorrect "heap +limit exceeded" errors. + +13. Add "kibibytes" to the heap limit output from pcre2test -C to make the +units clear. + +14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness. + +15. Updated the VMS-specific code in pcre2test on the advice of a VMS user. + +16. Removed the unnecessary inclusion of stdint.h (or inttypes.h) from +pcre2_internal.h as it is now included by pcre2.h. Also, change 17 for 10.32 +below was unnecessarily complicated, as inttypes.h is a Standard C header, +which is defined to be a superset of stdint.h. Instead of conditionally +including stdint.h or inttypes.h, pcre2.h now unconditionally includes +inttypes.h. This supports environments that do not have stdint.h but do have +inttypes.h, which are known to exist. A note in the autotools documentation +says (November 2018) that there are none known that are the other way round. + +17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to +forcibly disable the use of %zu and %td in formatting strings because there is +at least one version of VMS that claims to be C99 but does not support these +modifiers. + +18. Added --disable-pcre2grep-callout-fork, which restricts the callout support +in pcre2grep to the inbuilt echo facility. This may be useful in environments +that do not support fork(). + +19. Fix two instances of <= 0 being applied to unsigned integers (the VMS +compiler complains). + +20. Added "fork" support for VMS to pcre2grep, for running an external program +via a string callout. + +21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel. + +22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN) +followed by ^ it was not recognized as anchored. + +23. The RunGrepTest script used to cut out the test of NUL characters for +Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD +systems can't either. I've inverted the test so that only those OS that are +known to work (currently only Linux) try to run this test. + +24. Some tests in RunGrepTest appended to testtrygrep from two different file +descriptors instead of redirecting stderr to stdout. This worked on Linux, but +it was reported not to on other systems, causing the tests to fail. + +25. In the RunTest script, make the test for stack setting use the same value +for the stack as it needs for -bigstack. + +26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning. + +26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s +which are valid in character classes, but not as the end of ranges, were being +treated as literals. An example is [_-\s] (but not [\s-_] because that gave an +error at the *start* of a range). Now an "invalid range" error is given +independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape +sequences such as \eX when they appeared invalidly in a character class. Now +the option applies only to unrecognized or malformed escape sequences. + +28. Fix word boundary in JIT compiler. Patch by Mike Munday. + +29. The pcre2_dfa_match() function was incorrectly handling conditional version +tests such as (?(VERSION>=0)...) when the version test was true. Incorrect +processing or a crash could result. + +30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group +names, as Perl does. There was a small bug in this new code, found by +ClusterFuzz 12950, fixed before release. + +31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh} +construct. + +32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits +from auto-anchoring if \p{Any}* starts a pattern. + +33. Compile invalid UTF check in JIT test when only pcre32 is enabled. + +34. For some time now, CMake has been warning about the setting of policy +CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be +removed in a future version. A request for CMake expertise on the list produced +no result, so I have now hacked CMakeLists.txt along the lines of some changes +I found on the Internet. The new code no longer needs the policy setting, and +it appears to work fine on Linux. + +35. Setting --enable-jit=auto for an out-of-tree build failed because the +source directory wasn't in the search path for AC_TRY_COMPILE always. Patch +from Ross Burton. + +36. Disable SSE2 JIT optimizations in x86 CPUs when SSE2 is not available. +Patch by Guillem Jover. + +37. Changed expressions such as 1<<10 to 1u<<10 in many places because compiler +warnings were reported. + +38. Using the clang compiler with sanitizing options causes runtime complaints +about truncation for statements such as x = ~x when x is an 8-bit value; it +seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x +gets rid of the warnings. There were also two missing casts in pcre2test. + + +Version 10.32 10-September-2018 +------------------------------- + +1. When matching using the REG_STARTEND feature of the POSIX API with a +non-zero starting offset, unset capturing groups with lower numbers than a +group that did capture something were not being correctly returned as "unset" +(that is, with offset values of -1). + +2. When matching using the POSIX API, pcre2test used to omit listing unset +groups altogether. Now it shows those that come before any actual captures as +"", as happens for non-POSIX matching. + +3. Running "pcre2test -C" always stated "\R matches CR, LF, or CRLF only", +whatever the build configuration was. It now correctly says "\R matches all +Unicode newlines" in the default case when --enable-bsr-anycrlf has not been +specified. Similarly, running "pcre2test -C bsr" never produced the result +ANY. + +4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing +multi-code-unit characters caused bad behaviour and possibly a crash. This +issue was fixed for other kinds of repeat in release 10.20 by change 19, but +repeating character classes were overlooked. + +5. pcre2grep now supports the inclusion of binary zeros in patterns that are +read from files via the -f option. + +6. A small fix to pcre2grep to avoid compiler warnings for -Wformat-overflow=2. + +7. Added --enable-jit=auto support to configure.ac. + +8. Added some dummy variables to the heapframe structure in 16-bit and 32-bit +modes for the benefit of m68k, where pointers can be 16-bit aligned. The +dummies force 32-bit alignment and this ensures that the structure is a +multiple of PCRE2_SIZE, a requirement that is tested at compile time. In other +architectures, alignment requirements take care of this automatically. + +9. When returning an error from pcre2_pattern_convert(), ensure the error +offset is set zero for early errors. + +10. A number of patches for Windows support from Daniel Richard G: + + (a) List of error numbers in Runtest.bat corrected (it was not the same as in + Runtest). + + (b) pcre2grep snprintf() workaround as used elsewhere in the tree. + + (c) Support for non-C99 snprintf() that returns -1 in the overflow case. + +11. Minor tidy of pcre2_dfa_match() code. + +12. Refactored pcre2_dfa_match() so that the internal recursive calls no longer +use the stack for local workspace and local ovectors. Instead, an initial block +of stack is reserved, but if this is insufficient, heap memory is used. The +heap limit parameter now applies to pcre2_dfa_match(). + +13. If a "find limits" test of DFA matching in pcre2test resulted in too many +matches for the ovector, no matches were displayed. + +14. Removed an occurrence of ctrl/Z from test 6 because Windows treats it as +EOF. The test looks to have come from a fuzzer. + +15. If PCRE2 was built with a default match limit a lot greater than the +default default of 10 000 000, some JIT tests of the match limit no longer +failed. All such tests now set 10 000 000 as the upper limit. + +16. Another Windows related patch for pcregrep to ensure that WIN32 is +undefined under Cygwin. + +17. Test for the presence of stdint.h and inttypes.h in configure and CMake and +include whichever exists (stdint preferred) instead of unconditionally +including stdint. This makes life easier for old and non-standard systems. + +18. Further changes to improve portability, especially to old and or non- +standard systems: + + (a) Put all printf arguments in RunGrepTest into single, not double, quotes, + and use \0 not \x00 for binary zero. + + (b) Avoid the use of C++ (i.e. BCPL) // comments. + + (c) Parameterize the use of %zu in pcre2test to make it like %td. For both of + these now, if using MSVC or a standard C before C99, %lu is used with a + cast if necessary. + +19. Applied a contributed patch to CMakeLists.txt to increase the stack size +when linking pcre2test with MSVC. This gets rid of a stack overflow error in +the standard set of tests. + +20. Output a warning in pcre2test when ignoring the "altglobal" modifier when +it is given with the "replace" modifier. + +21. In both pcre2test and pcre2_substitute(), with global matching, a pattern +that matched an empty string, but never at the starting match offset, was not +handled in a Perl-compatible way. The pattern /(a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP +shouldn't find a MARK (because is in an atomic group), but it did. + +26. Upgraded the perltest.sh script: (1) #pattern lines can now be used to set +a list of modifiers for all subsequent patterns - only those that the script +recognizes are meaningful; (2) #subject lines can be used to set or unset a +default "mark" modifier; (3) Unsupported #command lines give a warning when +they are ignored; (4) Mark data is output only if the "mark" modifier is +present. + +27. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported. + +28. A (*MARK) name was not being passed back for positive assertions that were +terminated by (*ACCEPT). + +29. Add support for \N{U+dddd}, but only in Unicode mode. + +30. Add support for (?^) for unsetting all imnsx options. + +31. The PCRE2_EXTENDED (/x) option only ever discarded space characters whose +code point was less than 256 and that were recognized by the lookup table +generated by pcre2_maketables(), which uses isspace() to identify white space. +Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085, +U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by +Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl. + +32. In certain circumstances, option settings within patterns were not being +correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly +matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the +end of its group during the parse process, but without another setting such as +(?m) the compile phase got it right.) This bug was introduced by the +refactoring in release 10.23. + +33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to +define memmove() as function call to bcopy(). This hasn't been tested for a +long time because in pcre2test the result of memmove() was being used, whereas +bcopy() doesn't return a result. This feature is now refactored always to call +an emulation function when there is no memmove(). The emulation makes use of +bcopy() when available. + +34. When serializing a pattern, set the memctl, executable_jit, and tables +fields (that is, all the fields that contain pointers) to zeros so that the +result of serializing is always the same. These fields are re-set when the +pattern is deserialized. + +35. In a pattern such as /[^\x{100}-\x{ffff}]*[\x80-\xff]/ which has a repeated +negative class with no characters less than 0x100 followed by a positive class +with only characters less than 0x100, the first class was incorrectly being +auto-possessified, causing incorrect match failures. + +36. Removed the character type bit ctype_meta, which dates from PCRE1 and is +not used in PCRE2. + +37. Tidied up unnecessarily complicated macros used in the escapes table. + +38. Since 10.21, the new testoutput8-16-4 file has accidentally been omitted +from distribution tarballs, owing to a typo in Makefile.am which had +testoutput8-16-3 twice. Now fixed. + +39. If the only branch in a conditional subpattern was anchored, the whole +subpattern was treated as anchored, when it should not have been, since the +assumed empty second branch cannot be anchored. Demonstrated by test patterns +such as /(?(1)^())b/ or /(?(?=^))b/. + +40. A repeated conditional subpattern that could match an empty string was +always assumed to be unanchored. Now it is checked just like any other +repeated conditional subpattern, and can be found to be anchored if the minimum +quantifier is one or more. I can't see much use for a repeated anchored +pattern, but the behaviour is now consistent. + +41. Minor addition to pcre2_jit_compile.c to avoid static analyzer complaint +(for an event that could never occur but you had to have external information +to know that). + +42. If before the first match in a file that was being searched by pcre2grep +there was a line that was sufficiently long to cause the input buffer to be +expanded, the variable holding the location of the end of the previous match +was being adjusted incorrectly, and could cause an overflow warning from a code +sanitizer. However, as the value is used only to print pending "after" lines +when the next match is reached (and there are no such lines in this case) this +bug could do no damage. + + +Version 10.31 12-February-2018 +------------------------------ + +1. Fix typo (missing ]) in VMS code in pcre2test.c. + +2. Replace the replicated code for matching extended Unicode grapheme sequences +(which got a lot more complicated by change 10.30/49) by a single subroutine +that is called by both pcre2_match() and pcre2_dfa_match(). + +3. Add idempotent guard to pcre2_internal.h. + +4. Add new pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and +PCRE2_CONFIG_COMPILED_WIDTHS. + +5. Cut out \C tests in the JIT regression tests when NEVER_BACKSLASH_C is +defined (e.g. by --enable-never-backslash-C). + +6. Defined public names for all the pcre2_compile() error numbers, and used +the public names in pcre2_convert.c. + +7. Fixed a small memory leak in pcre2test (convert contexts). + +8. Added two casts to compile.c and one to match.c to avoid compiler warnings. + +9. Added code to pcre2grep when compiled under VMS to set the symbol +PCRE2GREP_RC to the exit status, because VMS does not distinguish between +exit(0) and exit(1). + +10. Added the -LM (list modifiers) option to pcre2test. Also made -C complain +about a bad option only if the following argument item does not start with a +hyphen. + +11. pcre2grep was truncating components of file names to 128 characters when +processing files with the -r option, and also (some very odd code) truncating +path names to 512 characters. There is now a check on the absolute length of +full path file names, which may be up to 2047 characters long. + +12. When an assertion contained (*ACCEPT) it caused all open capturing groups +to be closed (as for a non-assertion ACCEPT), which was wrong and could lead to +misbehaviour for subsequent references to groups that started outside the +assertion. ACCEPT in an assertion now closes only those groups that were +started within that assertion. Fixes oss-fuzz issues 3852 and 3891. + +13. Multiline matching in pcre2grep was misbehaving if the pattern matched +within a line, and then matched again at the end of the line and over into +subsequent lines. Behaviour was different with and without colouring, and +sometimes context lines were incorrectly printed and/or line endings were lost. +All these issues should now be fixed. + +14. If --line-buffered was specified for pcre2grep when input was from a +compressed file (.gz or .bz2) a segfault occurred. (Line buffering should be +ignored for compressed files.) + +15. Although pcre2_jit_match checks whether the pattern is compiled +in a given mode, it was also expected that at least one mode is available. +This is fixed and pcre2_jit_match returns with PCRE2_ERROR_JIT_BADOPTION +when the pattern is not optimized by JIT at all. + +16. The line number and related variables such as match counts in pcre2grep +were all int variables, causing overflow when files with more than 2147483647 +lines were processed (assuming 32-bit ints). They have all been changed to +unsigned long ints. + +17. If a backreference with a minimum repeat count of zero was first in a +pattern, apart from assertions, an incorrect first matching character could be +recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set +as the first character of a match. + +18. Characters in a leading positive assertion are considered for recording a +first character of a match when the rest of the pattern does not provide one. +However, a character in a non-assertive group within a leading assertion such +as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an +infelicity rather than an outright bug, because it did not affect the result of +a match, just its speed. (In fact, in this case, the starting 'a' was +subsequently picked up in the study.) + +19. A minor tidy in pcre2_match(): making all PCRE2_ERROR_ returns use "return" +instead of "RRETURN" saves unwinding the backtracks in these cases (only one +didn't). + +20. Allocate a single callout block on the stack at the start of pcre2_match() +and set its never-changing fields once only. Do the same for pcre2_dfa_match(). + +21. Save the extra compile options (set in the compile context) with the +compiled pattern (they were not previously saved), add PCRE2_INFO_EXTRAOPTIONS +to retrieve them, and update pcre2test to show them. + +22. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new +field callout_flags in callout blocks. The bits are set by pcre2_match(), but +not by JIT or pcre2_dfa_match(). Their settings are shown in pcre2test callouts +if the callout_extra subject modifier is set. These bits are provided to help +with tracking how a backtracking match is proceeding. + +23. Updated the pcre2demo.c demonstration program, which was missing the extra +code for -g that handles the case when \K in an assertion causes the match to +end at the original start point. Also arranged for it to detect when \K causes +the end of a match to be before its start. + +24. Similar to 23 above, strange things (including loops) could happen in +pcre2grep when \K was used in an assertion when --colour was used or in +multiline mode. The "end at original start point" bug is fixed, and if the end +point is found to be before the start point, they are swapped. + +25. When PCRE2_FIRSTLINE without PCRE2_NO_START_OPTIMIZE was used in non-JIT +matching (both pcre2_match() and pcre2_dfa_match()) and the matched string +started with the first code unit of a newline sequence, matching failed because +it was not tried at the newline. + +26. Code for giving up a non-partial match after failing to find a starting +code unit anywhere in the subject was missing when searching for one of a +number of code units (the bitmap case) in both pcre2_match() and +pcre2_dfa_match(). This was a missing optimization rather than a bug. + +27. Tidied up the ACROSSCHAR macro to be like FORWARDCHAR and BACKCHAR, using a +pointer argument rather than a code unit value. This should not have affected +the generated code. + +28. The JIT compiler has been updated. + +29. Avoid pointer overflow for unset captures in pcre2_substring_list_get(). +This could not actually cause a crash because it was always used in a memcpy() +call with zero length. + +30. Some internal structures have a variable-length ovector[] as their last +element. Their actual memory is obtained dynamically, giving an ovector of +appropriate length. However, they are defined in the structure as +ovector[NUMBER], where NUMBER is large so that array bound checkers don't +grumble. The value of NUMBER was 10000, but a fuzzer exceeded 5000 capturing +groups, making the ovector larger than this. The number has been increased to +131072, which allows for the maximum number of captures (65535) plus the +overall match. This fixes oss-fuzz issue 5415. + +31. Auto-possessification at the end of a capturing group was dependent on what +follows the group (e.g. /(a+)b/ would auto-possessify the a+) but this caused +incorrect behaviour when the group was called recursively from elsewhere in the +pattern where something different might follow. This bug is an unforseen +consequence of change #1 for 10.30 - the implementation of backtracking into +recursions. Iterators at the ends of capturing groups are no longer considered +for auto-possessification if the pattern contains any recursions. Fixes +Bugzilla #2232. + + +Version 10.30 14-August-2017 +---------------------------- + +1. The main interpreter, pcre2_match(), has been refactored into a new version +that does not use recursive function calls (and therefore the stack) for +remembering backtracking positions. This makes --disable-stack-for-recursion a +NOOP. The new implementation allows backtracking into recursive group calls in +patterns, making it more compatible with Perl, and also fixes some other +hard-to-do issues such as #1887 in Bugzilla. The code is also cleaner because +the old code had a number of fudges to try to reduce stack usage. It seems to +run no slower than the old code. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. These +bugs were never in fully released code, but are noted here for the record. + + (a) If a pattern had fewer capturing parentheses than the ovector supplied in + the match data block, a memory error (detectable by ASAN) occurred after + a match, because the external block was being set from non-existent + internal ovector fields. Fixes oss-fuzz issue 781. + + (b) A pattern with very many capturing parentheses (when the internal frame + size was greater than the initial frame vector on the stack) caused a + crash. A vector on the heap is now set up at the start of matching if the + vector on the stack is not big enough to handle at least 10 frames. + Fixes oss-fuzz issue 783. + + (c) Handling of (*VERB)s in recursions was wrong in some cases. + + (d) Captures in negative assertions that were used as conditions were not + happening if the assertion matched via (*ACCEPT). + + (e) Mark values were not being passed out of recursions. + + (f) Refactor some code in do_callout() to avoid picky compiler warnings about + negative indices. Fixes oss-fuzz issue 1454. + + (g) Similarly refactor the way the variable length ovector is addressed for + similar reasons. Fixes oss-fuzz issue 1465. + +2. Now that pcre2_match() no longer uses recursive function calls (see above), +the "match limit recursion" value seems misnamed. It still exists, and limits +the depth of tree that is searched. To avoid future confusion, it has been +renamed as "depth limit" in all relevant places (--with-depth-limit, +(*LIMIT_DEPTH), pcre2_set_depth_limit(), etc) but the old names are still +available for backwards compatibility. + +3. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers: + + (a) Check for malloc failures when getting memory for the ovector (POSIX) or + the match data block (non-POSIX). + +4. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property +for a character with a code point greater than 0x10ffff (the Unicode maximum) +caused a crash. + +5. If a lookbehind assertion that contained a back reference to a group +appearing later in the pattern was compiled with the PCRE2_ANCHORED option, +undefined actions (often a segmentation fault) could occur, depending on what +other options were set. An example assertion is (?" should be ">=" in opcode check in pcre2_auto_possess.c. + (b) Added some casts to avoid "suspicious implicit sign extension". + (c) Resource leaks in pcre2test in rare error cases. + (d) Avoid warning for never-use case OP_TABLE_LENGTH which is just a fudge + for checking at compile time that tables are the right size. + (e) Add missing "fall through" comment. + +29. Implemented PCRE2_EXTENDED_MORE and related /xx and (?xx) features. + +30. Implement (?n: for PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + +31. If more than one of "push", "pushcopy", or "pushtablescopy" were set in +pcre2test, a crash could occur. + +32. Make -bigstack in RunTest allocate a 64MiB stack (instead of 16MiB) so +that all the tests can run with clang's sanitizing options. + +33. Implement extra compile options in the compile context and add the first +one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. + +34. Implement newline type PCRE2_NEWLINE_NUL. + +35. A lookbehind assertion that had a zero-length branch caused undefined +behaviour when processed by pcre2_dfa_match(). This is oss-fuzz issue 1859. + +36. The match limit value now also applies to pcre2_dfa_match() as there are +patterns that can use up a lot of resources without necessarily recursing very +deeply. (Compare item 10.23/36.) This should fix oss-fuzz #1761. + +37. Implement PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +38. Fix returned offsets from regexec() when REG_STARTEND is used with a +starting offset greater than zero. + +39. Implement REG_PEND (GNU extension) for the POSIX wrapper. + +40. Implement the subject_literal modifier in pcre2test, and allow jitstack on +pattern lines. + +41. Implement PCRE2_LITERAL and use it to support REG_NOSPEC. + +42. Implement PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD for the benefit +of pcre2grep. + +43. Re-implement pcre2grep's -F, -w, and -x options using PCRE2_LITERAL, +PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This fixes two bugs: + + (a) The -F option did not work for fixed strings containing \E. + (b) The -w option did not work for patterns with multiple branches. + +44. Added configuration options for the SELinux compatible execmem allocator in +JIT. + +45. Increased the limit for searching for a "must be present" code unit in +subjects from 1000 to 2000 for 8-bit searches, since they use memchr() and are +much faster. + +46. Arrange for anchored patterns to record and use "first code unit" data, +because this can give a fast "no match" without searching for a "required code +unit". Previously only non-anchored patterns did this. + +47. Upgraded the Unicode tables from Unicode 8.0.0 to Unicode 10.0.0. + +48. Add the callout_no_where modifier to pcre2test. + +49. Update extended grapheme breaking rules to the latest set that are in +Unicode Standard Annex #29. + +50. Added experimental foreign pattern conversion facilities +(pcre2_pattern_convert() and friends). + +51. Change the macro FWRITE, used in pcre2grep, to FWRITE_IGNORE because FWRITE +is defined in a system header in cygwin. Also modified some of the #ifdefs in +pcre2grep related to Windows and Cygwin support. + +52. Change 3(g) for 10.23 was a bit too zealous. If a hyphen that follows a +character class is the last character in the class, Perl does not give a +warning. PCRE2 now also treats this as a literal. + +53. Related to 52, though PCRE2 was throwing an error for [[:digit:]-X] it was +not doing so for [\d-X] (and similar escapes), as is documented. + +54. Fixed a MIPS issue in the JIT compiler reported by Joshua Kinard. + +55. Fixed a "maybe uninitialized" warning for class_uchardata in \p handling in +pcre2_compile() which could never actually trigger (code should have been cut +out when Unicode support is disabled). + + +Version 10.23 14-February-2017 +------------------------------ + +1. Extended pcre2test with the utf8_input modifier so that it is able to +generate all possible 16-bit and 32-bit code unit values in non-UTF modes. + +2. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without +PCRE2_UCP set, a negative character type such as \D in a positive class should +cause all characters greater than 255 to match, whatever else is in the class. +There was a bug that caused this not to happen if a Unicode property item was +added to such a class, for example [\D\P{Nd}] or [\W\pL]. + +3. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed, including: + + (a) \Q\E in the middle of a quantifier such as A+\Q\E+ is now ignored instead + of giving an invalid quantifier error. + + (b) {0} can now be used after a group in a lookbehind assertion; previously + this caused an "assertion is not fixed length" error. + + (c) Perl always treats (?(DEFINE) as a "define" group, even if a group with + the name "DEFINE" exists. PCRE2 now does likewise. + + (d) A recursion condition test such as (?(R2)...) must now refer to an + existing subpattern. + + (e) A conditional recursion test such as (?(R)...) misbehaved if there was a + group whose name began with "R". + + (f) When testing zero-terminated patterns under valgrind, the terminating + zero is now marked "no access". This catches bugs that would otherwise + show up only with non-zero-terminated patterns. + + (g) A hyphen appearing immediately after a POSIX character class (for example + /[[:ascii:]-z]/) now generates an error. Perl does accept this as a + literal, but gives a warning, so it seems best to fail it in PCRE. + + (h) An empty \Q\E sequence may appear after a callout that precedes an + assertion condition (it is, of course, ignored). + +One effect of the refactoring is that some error numbers and messages have +changed, and the pattern offset given for compiling errors is not always the +right-most character that has been read. In particular, for a variable-length +lookbehind assertion it now points to the start of the assertion. Another +change is that when a callout appears before a group, the "length of next +pattern item" that is passed now just gives the length of the opening +parenthesis item, not the length of the whole group. A length of zero is now +given only for a callout at the end of the pattern. Automatic callouts are no +longer inserted before and after explicit callouts in the pattern. + +A number of bugs in the refactored code were subsequently fixed during testing +before release, but after the code was made available in the repository. Many +of the bugs were discovered by fuzzing testing. Several of them were related to +the change from assuming a zero-terminated pattern (which previously had +required non-zero terminated strings to be copied). These bugs were never in +fully released code, but are noted here for the record. + + (a) An overall recursion such as (?0) inside a lookbehind assertion was not + being diagnosed as an error. + + (b) In utf mode, the length of a *MARK (or other verb) name was being checked + in characters instead of code units, which could lead to bad code being + compiled, leading to unpredictable behaviour. + + (c) In extended /x mode, characters whose code was greater than 255 caused + a lookup outside one of the global tables. A similar bug existed for wide + characters in *VERB names. + + (d) The amount of memory needed for a compiled pattern was miscalculated if a + lookbehind contained more than one toplevel branch and the first branch + was of length zero. + + (e) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero- + terminated pattern, if a # comment ran on to the end of the pattern, one + or more code units past the end were being read. + + (f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g. + "{2,2") could cause reading beyond the pattern. + + (g) When reading a callout string, if the end delimiter was at the end of the + pattern one further code unit was read. + + (h) An unterminated number after \g' could cause reading beyond the pattern. + + (i) An insufficient memory size was being computed for compiling with + PCRE2_AUTO_CALLOUT. + + (j) A conditional group with an assertion condition used more memory than was + allowed for it during parsing, so too many of them could therefore + overrun a buffer. + + (k) If parsing a pattern exactly filled the buffer, the internal test for + overrun did not check when the final META_END item was added. + + (l) If a lookbehind contained a subroutine call, and the called group + contained an option setting such as (?s), and the PCRE2_ANCHORED option + was set, unpredictable behaviour could occur. The underlying bug was + incorrect code and insufficient checking while searching for the end of + the called subroutine in the parsed pattern. + + (m) Quantifiers following (*VERB)s were not being diagnosed as errors. + + (n) The use of \Q...\E in a (*VERB) name when PCRE2_ALT_VERBNAMES and + PCRE2_AUTO_CALLOUT were both specified caused undetermined behaviour. + + (o) If \Q was preceded by a quantified item, and the following \E was + followed by '?' or '+', and there was at least one literal character + between them, an internal error "unexpected repeat" occurred (example: + /.+\QX\E+/). + + (p) A buffer overflow could occur while sorting the names in the group name + list (depending on the order in which the names were seen). + + (q) A conditional group that started with a callout was not doing the right + check for a following assertion, leading to compiling bad code. Example: + /(?(C'XX))?!XX/ + + (r) If a character whose code point was greater than 0xffff appeared within + a lookbehind that was within another lookbehind, the calculation of the + lookbehind length went wrong and could provoke an internal error. + + (t) The sequence \E- or \Q\E- after a POSIX class in a character class caused + an internal error. Now the hyphen is treated as a literal. + +4. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +5. pcre2test has been upgraded so that, when run under valgrind with valgrind +support enabled, reading past the end of the pattern is detected, both when +compiling and during callout processing. + +6. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +7. Automatic callouts are no longer generated before and after callouts in the +pattern. + +8. When pcre2test was outputting information from a callout, the caret indicator +for the current position in the subject line was incorrect if it was after an +escape sequence for a character whose code point was greater than \x{ff}. + +9. Change 19 for 10.22 had a typo (PCRE_STATIC_RUNTIME should be +PCRE2_STATIC_RUNTIME). Fix from David Gaussmann. + +10. Added --max-buffer-size to pcre2grep, to allow for automatic buffer +expansion when long lines are encountered. Original patch by Dmitry +Cherniachenko. + +11. If pcre2grep was compiled with JIT support, but the library was compiled +without it (something that neither ./configure nor CMake allow, but it can be +done by editing config.h), pcre2grep was giving a JIT error. Now it detects +this situation and does not try to use JIT. + +12. Added some "const" qualifiers to variables in pcre2grep. + +13. Added Dmitry Cherniachenko's patch for colouring output in Windows +(untested by me). Also, look for GREP_COLOUR or GREP_COLOR if the environment +variables PCRE2GREP_COLOUR and PCRE2GREP_COLOR are not found. + +14. Add the -t (grand total) option to pcre2grep. + +15. A number of bugs have been mended relating to match start-up optimizations +when the first thing in a pattern is a positive lookahead. These all applied +only when PCRE2_NO_START_OPTIMIZE was *not* set: + + (a) A pattern such as (?=.*X)X$ was incorrectly optimized as if it needed + both an initial 'X' and a following 'X'. + (b) Some patterns starting with an assertion that started with .* were + incorrectly optimized as having to match at the start of the subject or + after a newline. There are cases where this is not true, for example, + (?=.*[A-Z])(?=.{8,16})(?!.*[\s]) matches after the start in lines that + start with spaces. Starting .* in an assertion is no longer taken as an + indication of matching at the start (or after a newline). + +16. The "offset" modifier in pcre2test was not being ignored (as documented) +when the POSIX API was in use. + +17. Added --enable-fuzz-support to "configure", causing an non-installed +library containing a test function that can be called by fuzzers to be +compiled. A non-installed binary to run the test function locally, called +pcre2fuzzcheck is also compiled. + +18. A pattern with PCRE2_DOTALL (/s) set but not PCRE2_NO_DOTSTAR_ANCHOR, and +which started with .* inside a positive lookahead was incorrectly being +compiled as implicitly anchored. + +19. Removed all instances of "register" declarations, as they are considered +obsolete these days and in any case had become very haphazard. + +20. Add strerror() to pcre2test for failed file opening. + +21. Make pcre2test -C list valgrind support when it is enabled. + +22. Add the use_length modifier to pcre2test. + +23. Fix an off-by-one bug in pcre2test for the list of names for 'get' and +'copy' modifiers. + +24. Add PCRE2_CALL_CONVENTION into the prototype declarations in pcre2.h as it +is apparently needed there as well as in the function definitions. (Why did +nobody ask for this in PCRE1?) + +25. Change the _PCRE2_H and _PCRE2_UCP_H guard macros in the header files to +PCRE2_H_IDEMPOTENT_GUARD and PCRE2_UCP_H_IDEMPOTENT_GUARD to be more standard +compliant and unique. + +26. pcre2-config --libs-posix was listing -lpcre2posix instead of +-lpcre2-posix. Also, the CMake build process was building the library with the +wrong name. + +27. In pcre2test, give some offset information for errors in hex patterns. +This uses the C99 formatting sequence %td, except for MSVC which doesn't +support it - %lu is used instead. + +28. Implemented pcre2_code_copy_with_tables(), and added pushtablescopy to +pcre2test for testing it. + +29. Fix small memory leak in pcre2test. + +30. Fix out-of-bounds read for partial matching of /./ against an empty string +when the newline type is CRLF. + +31. Fix a bug in pcre2test that caused a crash when a locale was set either in +the current pattern or a previous one and a wide character was matched. + +32. The appearance of \p, \P, or \X in a substitution string when +PCRE2_SUBSTITUTE_EXTENDED was set caused a segmentation fault (NULL +dereference). + +33. If the starting offset was specified as greater than the subject length in +a call to pcre2_substitute() an out-of-bounds memory reference could occur. + +34. When PCRE2 was compiled to use the heap instead of the stack for recursive +calls to match(), a repeated minimizing caseless back reference, or a +maximizing one where the two cases had different numbers of code units, +followed by a caseful back reference, could lose the caselessness of the first +repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX +but didn't). + +35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum +matching length and just records zero. Typically this happens when there are +too many nested or recursive back references. If the limit was reached in +certain recursive cases it failed to be triggered and an internal error could +be the result. + +36. The pcre2_dfa_match() function now takes note of the recursion limit for +the internal recursive calls that are used for lookrounds and recursions within +the pattern. + +37. More refactoring has got rid of the internal could_be_empty_branch() +function (around 400 lines of code, including comments) by keeping track of +could-be-emptiness as the pattern is compiled instead of scanning compiled +groups. (This would have been much harder before the refactoring of #3 above.) +This lifts a restriction on the number of branches in a group (more than about +1100 would give "pattern is too complicated"). + +38. Add the "-ac" command line option to pcre2test as a synonym for "-pattern +auto_callout". + +39. In a library with Unicode support, incorrect data was compiled for a +pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide +characters to match (for example, /[\s[:^ascii:]]/). + +40. The callout_error modifier has been added to pcre2test to make it possible +to return PCRE2_ERROR_CALLOUT from a callout. + +41. A minor change to pcre2grep: colour reset is now "[0m" instead of +"[00m". + +42. The limit in the auto-possessification code that was intended to catch +overly-complicated patterns and not spend too much time auto-possessifying was +being reset too often, resulting in very long compile times for some patterns. +Now such patterns are no longer completely auto-possessified. + +43. Applied Jason Hood's revised patch for RunTest.bat. + +44. Added a new Windows script RunGrepTest.bat, courtesy of Jason Hood. + +45. Minor cosmetic fix to pcre2test: move a variable that is not used under +Windows into the "not Windows" code. + +46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy +some of the code: + + * normalised the Windows condition by ensuring WIN32 is defined; + * enables the callout feature under Windows; + * adds globbing (Microsoft's implementation expands quoted args), + using a tweaked opendirectory; + * implements the is_*_tty functions for Windows; + * --color=always will write the ANSI sequences to file; + * add sequences 4 (underline works on Win10) and 5 (blink as bright + background, relatively standard on DOS/Win); + * remove the (char *) casts for the now-const strings; + * remove GREP_COLOUR (grep's command line allowed the 'u', but not + the environment), parsing GREP_COLORS instead; + * uses the current colour if not set, rather than black; + * add print_match for the undefined case; + * fixes a typo. + +In addition, colour settings containing anything other than digits and +semicolon are ignored, and the colour controls are no longer output for empty +strings. + +47. Detecting patterns that are too large inside the length-measuring loop +saves processing ridiculously long patterns to their end. + +48. Ignore PCRE2_CASELESS when processing \h, \H, \v, and \V in classes as it +just wastes time. In the UTF case it can also produce redundant entries in +XCLASS lists caused by characters with multiple other cases and pairs of +characters in the same "not-x" sublists. + +49. A pattern such as /(?=(a\K))/ can report the end of the match being before +its start; pcre2test was not handling this correctly when using the POSIX +interface (it was OK with the native interface). + +50. In pcre2grep, ignore all JIT compile errors. This means that pcre2grep will +continue to work, falling back to interpretation if anything goes wrong with +JIT. + +51. Applied patches from Christian Persch to configure.ac to make use of the +AC_USE_SYSTEM_EXTENSIONS macro and to test for functions used by the JIT +modules. + +52. Minor fixes to pcre2grep from Jason Hood: + * fixed some spacing; + * Windows doesn't usually use single quotes, so I've added a define + to use appropriate quotes [in an example]; + * LC_ALL was displayed as "LCC_ALL"; + * numbers 11, 12 & 13 should end in "th"; + * use double quotes in usage message. + +53. When autopossessifying, skip empty branches without recursion, to reduce +stack usage for the benefit of clang with -fsanitize-address, which uses huge +stack frames. Example pattern: /X?(R||){3335}/. Fixes oss-fuzz issue 553. + +54. A pattern with very many explicit back references to a group that is a long +way from the start of the pattern could take a long time to compile because +searching for the referenced group in order to find the minimum length was +being done repeatedly. Now up to 128 group minimum lengths are cached and the +attempt to find a minimum length is abandoned if there is a back reference to a +group whose number is greater than 128. (In that case, the pattern is so +complicated that this optimization probably isn't worth it.) This fixes +oss-fuzz issue 557. + +55. Issue 32 for 10.22 below was not correctly fixed. If pcre2grep in multiline +mode with --only-matching matched several lines, it restarted scanning at the +next line instead of moving on to the end of the matched string, which can be +several lines after the start. + +56. Applied Jason Hood's new patch for RunGrepTest.bat that updates it in line +with updates to the non-Windows version. + + + +Version 10.22 29-July-2016 +-------------------------- + +1. Applied Jason Hood's patches to RunTest.bat and testdata/wintestoutput3 +to fix problems with running the tests under Windows. + +2. Implemented a facility for quoting literal characters within hexadecimal +patterns in pcre2test, to make it easier to create patterns with just a few +non-printing characters. + +3. Binary zeros are not supported in pcre2test input files. It now detects them +and gives an error. + +4. Updated the valgrind parameters in RunTest: (a) changed smc-check=all to +smc-check=all-non-file; (b) changed obj:* in the suppression file to obj:??? so +that it matches only unknown objects. + +5. Updated the maintenance script maint/ManyConfigTests to make it easier to +select individual groups of tests. + +6. When the POSIX wrapper function regcomp() is called, the REG_NOSUB option +used to set PCRE2_NO_AUTO_CAPTURE when calling pcre2_compile(). However, this +disables the use of back references (and subroutine calls), which are supported +by other implementations of regcomp() with RE_NOSUB. Therefore, REG_NOSUB no +longer causes PCRE2_NO_AUTO_CAPTURE to be set, though it still ignores nmatch +and pmatch when regexec() is called. + +7. Because of 6 above, pcre2test has been modified with a new modifier called +posix_nosub, to call regcomp() with REG_NOSUB. Previously the no_auto_capture +modifier had this effect. That option is now ignored when the POSIX API is in +use. + +8. Minor tidies to the pcre2demo.c sample program, including more comments +about its 8-bit-ness. + +9. Detect unmatched closing parentheses and give the error in the pre-scan +instead of later. Previously the pre-scan carried on and could give a +misleading incorrect error message. For example, /(?J)(?'a'))(?'a')/ gave a +message about invalid duplicate group names. + +10. It has happened that pcre2test was accidentally linked with another POSIX +regex library instead of libpcre2-posix. In this situation, a call to regcomp() +(in the other library) may succeed, returning zero, but of course putting its +own data into the regex_t block. In one example the re_pcre2_code field was +left as NULL, which made pcre2test think it had not got a compiled POSIX regex, +so it treated the next line as another pattern line, resulting in a confusing +error message. A check has been added to pcre2test to see if the data returned +from a successful call of regcomp() are valid for PCRE2's regcomp(). If they +are not, an error message is output and the pcre2test run is abandoned. The +message points out the possibility of a mis-linking. Hopefully this will avoid +some head-scratching the next time this happens. + +11. A pattern such as /(?<=((?C)0))/, which has a callout inside a lookbehind +assertion, caused pcre2test to output a very large number of spaces when the +callout was taken, making the program appearing to loop. + +12. A pattern that included (*ACCEPT) in the middle of a sufficiently deeply +nested set of parentheses of sufficient size caused an overflow of the +compiling workspace (which was diagnosed, but of course is not desirable). + +13. Detect missing closing parentheses during the pre-pass for group +identification. + +14. Changed some integer variable types and put in a number of casts, following +a report of compiler warnings from Visual Studio 2013 and a few tests with +gcc's -Wconversion (which still throws up a lot). + +15. Implemented pcre2_code_copy(), and added pushcopy and #popcopy to pcre2test +for testing it. + +16. Change 66 for 10.21 introduced the use of snprintf() in PCRE2's version of +regerror(). When the error buffer is too small, my version of snprintf() puts a +binary zero in the final byte. Bug #1801 seems to show that other versions do +not do this, leading to bad output from pcre2test when it was checking for +buffer overflow. It no longer assumes a binary zero at the end of a too-small +regerror() buffer. + +17. Fixed typo ("&&" for "&") in pcre2_study(). Fortunately, this could not +actually affect anything, by sheer luck. + +18. Two minor fixes for MSVC compilation: (a) removal of apparently incorrect +"const" qualifiers in pcre2test and (b) defining snprintf as _snprintf for +older MSVC compilers. This has been done both in src/pcre2_internal.h for most +of the library, and also in src/pcre2posix.c, which no longer includes +pcre2_internal.h (see 24 below). + +19. Applied Chris Wilson's patch (Bugzilla #1681) to CMakeLists.txt for MSVC +static compilation. Subsequently applied Chris Wilson's second patch, putting +the first patch under a new option instead of being unconditional when +PCRE_STATIC is set. + +20. Updated pcre2grep to set stdout as binary when run under Windows, so as not +to convert \r\n at the ends of reflected lines into \r\r\n. This required +ensuring that other output that is written to stdout (e.g. file names) uses the +appropriate line terminator: \r\n for Windows, \n otherwise. + +21. When a line is too long for pcre2grep's internal buffer, show the maximum +length in the error message. + +22. Added support for string callouts to pcre2grep (Zoltan's patch with PH +additions). + +23. RunTest.bat was missing a "set type" line for test 22. + +24. The pcre2posix.c file was including pcre2_internal.h, and using some +"private" knowledge of the data structures. This is unnecessary; the code has +been re-factored and no longer includes pcre2_internal.h. + +25. A racing condition is fixed in JIT reported by Mozilla. + +26. Minor code refactor to avoid "array subscript is below array bounds" +compiler warning. + +27. Minor code refactor to avoid "left shift of negative number" warning. + +28. Add a bit more sanity checking to pcre2_serialize_decode() and document +that it expects trusted data. + +29. Fix typo in pcre2_jit_test.c + +30. Due to an oversight, pcre2grep was not making use of JIT when available. +This is now fixed. + +31. The RunGrepTest script is updated to use the valgrind suppressions file +when testing with JIT under valgrind (compare 10.21/51 below). The suppressions +file is updated so that is now the same as for PCRE1: it suppresses the +Memcheck warnings Addr16 and Cond in unknown objects (that is, JIT-compiled +code). Also changed smc-check=all to smc-check=all-non-file as was done for +RunTest (see 4 above). + +32. Implemented the PCRE2_NO_JIT option for pcre2_match(). + +33. Fix typo that gave a compiler error when JIT not supported. + +34. Fix comment describing the returns from find_fixedlength(). + +35. Fix potential negative index in pcre2test. + +36. Calls to pcre2_get_error_message() with error numbers that are never +returned by PCRE2 functions were returning empty strings. Now the error code +PCRE2_ERROR_BADDATA is returned. A facility has been added to pcre2test to +show the texts for given error numbers (i.e. to call pcre2_get_error_message() +and display what it returns) and a few representative error codes are now +checked in RunTest. + +37. Added "&& !defined(__INTEL_COMPILER)" to the test for __GNUC__ in +pcre2_match.c, in anticipation that this is needed for the same reason it was +recently added to pcrecpp.cc in PCRE1. + +38. Using -o with -M in pcre2grep could cause unnecessary repeated output when +the match extended over a line boundary, as it tried to find more matches "on +the same line" - but it was already over the end. + +39. Allow \C in lookbehinds and DFA matching in UTF-32 mode (by converting it +to the same code as '.' when PCRE2_DOTALL is set). + +40. Fix two clang compiler warnings in pcre2test when only one code unit width +is supported. + +41. Upgrade RunTest to automatically re-run test 2 with a large (64MiB) stack +if it fails when running the interpreter with a 16MiB stack (and if changing +the stack size via pcre2test is possible). This avoids having to manually set a +large stack size when testing with clang. + +42. Fix register overwrite in JIT when SSE2 acceleration is enabled. + +43. Detect integer overflow in pcre2test pattern and data repetition counts. + +44. In pcre2test, ignore "allcaptures" after DFA matching. + +45. Fix unaligned accesses on x86. Patch by Marc Mutz. + +46. Fix some more clang compiler warnings. + + +Version 10.21 12-January-2016 +----------------------------- + +1. Improve matching speed of patterns starting with + or * in JIT. + +2. Use memchr() to find the first character in an unanchored match in 8-bit +mode in the interpreter. This gives a significant speed improvement. + +3. Removed a redundant copy of the opcode_possessify table in the +pcre2_auto_possessify.c source. + +4. Fix typos in dftables.c for z/OS. + +5. Change 36 for 10.20 broke the handling of [[:>:]] and [[:<:]] in that +processing them could involve a buffer overflow if the following character was +an opening parenthesis. + +6. Change 36 for 10.20 also introduced a bug in processing this pattern: +/((?x)(*:0))#(?'/. Specifically: if a setting of (?x) was followed by a (*MARK) +setting (which (*:0) is), then (?x) did not get unset at the end of its group +during the scan for named groups, and hence the external # was incorrectly +treated as a comment and the invalid (?' at the end of the pattern was not +diagnosed. This caused a buffer overflow during the real compile. This bug was +discovered by Karl Skomski with the LLVM fuzzer. + +7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its +own source module to avoid a circular dependency between src/pcre2_compile.c +and src/pcre2_study.c + +8. A callout with a string argument containing an opening square bracket, for +example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer +overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +9. The handling of callouts during the pre-pass for named group identification +has been tightened up. + +10. The quantifier {1} can be ignored, whether greedy, non-greedy, or +possessive. This is a very minor optimization. + +11. A possessively repeated conditional group that could match an empty string, +for example, /(?(R))*+/, was incorrectly compiled. + +12. The Unicode tables have been updated to Unicode 8.0.0 (thanks to Christian +Persch). + +13. An empty comment (?#) in a pattern was incorrectly processed and could +provoke a buffer overflow. This bug was discovered by Karl Skomski with the +LLVM fuzzer. + +14. Fix infinite recursion in the JIT compiler when certain patterns such as +/(?:|a|){100}x/ are analysed. + +15. Some patterns with character classes involving [: and \\ were incorrectly +compiled and could cause reading from uninitialized memory or an incorrect +error diagnosis. Examples are: /[[:\\](?<[::]/ and /[[:\\](?'abc')[a:]. The +first of these bugs was discovered by Karl Skomski with the LLVM fuzzer. + +16. Pathological patterns containing many nested occurrences of [: caused +pcre2_compile() to run for a very long time. This bug was found by the LLVM +fuzzer. + +17. A missing closing parenthesis for a callout with a string argument was not +being diagnosed, possibly leading to a buffer overflow. This bug was found by +the LLVM fuzzer. + +18. A conditional group with only one branch has an implicit empty alternative +branch and must therefore be treated as potentially matching an empty string. + +19. If (?R was followed by - or + incorrect behaviour happened instead of a +diagnostic. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +20. Another bug that was introduced by change 36 for 10.20: conditional groups +whose condition was an assertion preceded by an explicit callout with a string +argument might be incorrectly processed, especially if the string contained \Q. +This bug was discovered by Karl Skomski with the LLVM fuzzer. + +21. Compiling PCRE2 with the sanitize options of clang showed up a number of +very pedantic coding infelicities and a buffer overflow while checking a UTF-8 +string if the final multi-byte UTF-8 character was truncated. + +22. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +23. Finding the minimum matching length of complex patterns with back +references and/or recursions can take a long time. There is now a cut-off that +gives up trying to find a minimum length when things get too complex. + +24. An optimization has been added that speeds up finding the minimum matching +length for patterns containing repeated capturing groups or recursions. + +25. If a pattern contained a back reference to a group whose number was +duplicated as a result of appearing in a (?|...) group, the computation of the +minimum matching length gave a wrong result, which could cause incorrect "no +match" errors. For such patterns, a minimum matching length cannot at present +be computed. + +26. Added a check for integer overflow in conditions (?() and +(?(R). This omission was discovered by Karl Skomski with the LLVM +fuzzer. + +27. Fixed an issue when \p{Any} inside an xclass did not read the current +character. + +28. If pcre2grep was given the -q option with -c or -l, or when handling a +binary file, it incorrectly wrote output to stdout. + +29. The JIT compiler did not restore the control verb head in case of *THEN +control verbs. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +30. The way recursive references such as (?3) are compiled has been re-written +because the old way was the cause of many issues. Now, conversion of the group +number into a pattern offset does not happen until the pattern has been +completely compiled. This does mean that detection of all infinitely looping +recursions is postponed till match time. In the past, some easy ones were +detected at compile time. This re-writing was done in response to yet another +bug found by the LLVM fuzzer. + +31. A test for a back reference to a non-existent group was missing for items +such as \987. This caused incorrect code to be compiled. This issue was found +by Karl Skomski with a custom LLVM fuzzer. + +32. Error messages for syntax errors following \g and \k were giving inaccurate +offsets in the pattern. + +33. Improve the performance of starting single character repetitions in JIT. + +34. (*LIMIT_MATCH=) now gives an error instead of setting the value to 0. + +35. Error messages for syntax errors in *LIMIT_MATCH and *LIMIT_RECURSION now +give the right offset instead of zero. + +36. The JIT compiler should not check repeats after a {0,1} repeat byte code. +This issue was found by Karl Skomski with a custom LLVM fuzzer. + +37. The JIT compiler should restore the control chain for empty possessive +repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer. + +38. A bug which was introduced by the single character repetition optimization +was fixed. + +39. Match limit check added to recursion. This issue was found by Karl Skomski +with a custom LLVM fuzzer. + +40. Arrange for the UTF check in pcre2_match() and pcre2_dfa_match() to look +only at the part of the subject that is relevant when the starting offset is +non-zero. + +41. Improve first character match in JIT with SSE2 on x86. + +42. Fix two assertion fails in JIT. These issues were found by Karl Skomski +with a custom LLVM fuzzer. + +43. Correct the setting of CMAKE_C_FLAGS in CMakeLists.txt (patch from Roy Ivy +III). + +44. Fix bug in RunTest.bat for new test 14, and adjust the script for the added +test (there are now 20 in total). + +45. Fixed a corner case of range optimization in JIT. + +46. Add the ${*MARK} facility to pcre2_substitute(). + +47. Modifier lists in pcre2test were splitting at spaces without the required +commas. + +48. Implemented PCRE2_ALT_VERBNAMES. + +49. Fixed two issues in JIT. These were found by Karl Skomski with a custom +LLVM fuzzer. + +50. The pcre2test program has been extended by adding the #newline_default +command. This has made it possible to run the standard tests when PCRE2 is +compiled with either CR or CRLF as the default newline convention. As part of +this work, the new command was added to several test files and the testing +scripts were modified. The pcre2grep tests can now also be run when there is no +LF in the default newline convention. + +51. The RunTest script has been modified so that, when JIT is used and valgrind +is specified, a valgrind suppressions file is set up to ignore "Invalid read of +size 16" errors because these are false positives when the hardware supports +the SSE2 instruction set. + +52. It is now possible to have comment lines amid the subject strings in +pcre2test (and perltest.sh) input. + +53. Implemented PCRE2_USE_OFFSET_LIMIT and pcre2_set_offset_limit(). + +54. Add the null_context modifier to pcre2test so that calling pcre2_compile() +and the matching functions with NULL contexts can be tested. + +55. Implemented PCRE2_SUBSTITUTE_EXTENDED. + +56. In a character class such as [\W\p{Any}] where both a negative-type escape +("not a word character") and a property escape were present, the property +escape was being ignored. + +57. Fixed integer overflow for patterns whose minimum matching length is very, +very large. + +58. Implemented --never-backslash-C. + +59. Change 55 above introduced a bug by which certain patterns provoked the +erroneous error "\ at end of pattern". + +60. The special sequences [[:<:]] and [[:>:]] gave rise to incorrect compiling +errors or other strange effects if compiled in UCP mode. Found with libFuzzer +and AddressSanitizer. + +61. Whitespace at the end of a pcre2test pattern line caused a spurious error +message if there were only single-character modifiers. It should be ignored. + +62. The use of PCRE2_NO_AUTO_CAPTURE could cause incorrect compilation results +or segmentation errors for some patterns. Found with libFuzzer and +AddressSanitizer. + +63. Very long names in (*MARK) or (*THEN) etc. items could provoke a buffer +overflow. + +64. Improve error message for overly-complicated patterns. + +65. Implemented an optional replication feature for patterns in pcre2test, to +make it easier to test long repetitive patterns. The tests for 63 above are +converted to use the new feature. + +66. In the POSIX wrapper, if regerror() was given too small a buffer, it could +misbehave. + +67. In pcre2_substitute() in UTF mode, the UTF validity check on the +replacement string was happening before the length setting when the replacement +string was zero-terminated. + +68. In pcre2_substitute() in UTF mode, PCRE2_NO_UTF_CHECK can be set for the +second and subsequent calls to pcre2_match(). + +69. There was no check for integer overflow for a replacement group number in +pcre2_substitute(). An added check for a number greater than the largest group +number in the pattern means this is not now needed. + +70. The PCRE2-specific VERSION condition didn't work correctly if only one +digit was given after the decimal point, or if more than two digits were given. +It now works with one or two digits, and gives a compile time error if more are +given. + +71. In pcre2_substitute() there was the possibility of reading one code unit +beyond the end of the replacement string. + +72. The code for checking a subject's UTF-32 validity for a pattern with a +lookbehind involved an out-of-bounds pointer, which could potentially cause +trouble in some environments. + +73. The maximum lookbehind length was incorrectly calculated for patterns such +as /(?<=(a)(?-1))x/ which have a recursion within a backreference. + +74. Give an error if a lookbehind assertion is longer than 65535 code units. + +75. Give an error in pcre2_substitute() if a match ends before it starts (as a +result of the use of \K). + +76. Check the length of subpattern names and the names in (*MARK:xx) etc. +dynamically to avoid the possibility of integer overflow. + +77. Implement pcre2_set_max_pattern_length() so that programs can restrict the +size of patterns that they are prepared to handle. + +78. (*NO_AUTO_POSSESS) was not working. + +79. Adding group information caching improves the speed of compiling when +checking whether a group has a fixed length and/or could match an empty string, +especially when recursion or subroutine calls are involved. However, this +cannot be used when (?| is present in the pattern because the same number may +be used for groups of different sizes. To catch runaway patterns in this +situation, counts have been introduced to the functions that scan for empty +branches or compute fixed lengths. + +80. Allow for the possibility of the size of the nest_save structure not being +a factor of the size of the compiling workspace (it currently is). + +81. Check for integer overflow in minimum length calculation and cap it at +65535. + +82. Small optimizations in code for finding the minimum matching length. + +83. Lock out configuring for EBCDIC with non-8-bit libraries. + +84. Test for error code <= 0 in regerror(). + +85. Check for too many replacements (more than INT_MAX) in pcre2_substitute(). + +86. Avoid the possibility of computing with an out-of-bounds pointer (though +not dereferencing it) while handling lookbehind assertions. + +87. Failure to get memory for the match data in regcomp() is now given as a +regcomp() error instead of waiting for regexec() to pick it up. + +88. In pcre2_substitute(), ensure that CRLF is not split when it is a valid +newline sequence. + +89. Paranoid check in regcomp() for bad error code from pcre2_compile(). + +90. Run test 8 (internal offsets and code sizes) for link sizes 3 and 4 as well +as for link size 2. + +91. Document that JIT has a limit on pattern size, and give more information +about JIT compile failures in pcre2test. + +92. Implement PCRE2_INFO_HASBACKSLASHC. + +93. Re-arrange valgrind support code in pcre2test to avoid spurious reports +with JIT (possibly caused by SSE2?). + +94. Support offset_limit in JIT. + +95. A sequence such as [[:punct:]b] that is, a POSIX character class followed +by a single ASCII character in a class item, was incorrectly compiled in UCP +mode. The POSIX class got lost, but only if the single character followed it. + +96. [:punct:] in UCP mode was matching some characters in the range 128-255 +that should not have been matched. + +97. If [:^ascii:] or [:^xdigit:] are present in a non-negated class, all +characters with code points greater than 255 are in the class. When a Unicode +property was also in the class (if PCRE2_UCP is set, escapes such as \w are +turned into Unicode properties), wide characters were not correctly handled, +and could fail to match. + +98. In pcre2test, make the "startoffset" modifier a synonym of "offset", +because it sets the "startoffset" parameter for pcre2_match(). + +99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between +an item and its qualifier (for example, A(?#comment)?B) pcre2_compile() +misbehaved. This bug was found by the LLVM fuzzer. + +100. The error for an invalid UTF pattern string always gave the code unit +offset as zero instead of where the invalidity was found. + +101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not +working correctly in UCP mode. + +102. Similar to 99 above, if an isolated \E was present between an item and its +qualifier when PCRE2_AUTO_CALLOUT was set, pcre2_compile() misbehaved. This bug +was found by the LLVM fuzzer. + +103. The POSIX wrapper function regexec() crashed if the option REG_STARTEND +was set when the pmatch argument was NULL. It now returns REG_INVARG. + +104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep. + +105. An empty \Q\E sequence between an item and its qualifier caused +pcre2_compile() to misbehave when auto callouts were enabled. This bug +was found by the LLVM fuzzer. + +106. If both PCRE2_ALT_VERBNAMES and PCRE2_EXTENDED were set, and a (*MARK) or +other verb "name" ended with whitespace immediately before the closing +parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when +both those options were set. + +107. In a number of places pcre2_compile() was not handling NULL characters +correctly, and pcre2test with the "bincode" modifier was not always correctly +displaying fields containing NULLS: + + (a) Within /x extended #-comments + (b) Within the "name" part of (*MARK) and other *verbs + (c) Within the text argument of a callout + +108. If a pattern that was compiled with PCRE2_EXTENDED started with white +space or a #-type comment that was followed by (?-x), which turns off +PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again, +pcre2_compile() assumed that (?-x) applied to the whole pattern and +consequently mis-compiled it. This bug was found by the LLVM fuzzer. The fix +for this bug means that a setting of any of the (?imsxJU) options at the start +of a pattern is no longer transferred to the options that are returned by +PCRE2_INFO_ALLOPTIONS. In fact, this was an anachronism that should have +changed when the effects of those options were all moved to compile time. + +109. An escaped closing parenthesis in the "name" part of a (*verb) when +PCRE2_ALT_VERBNAMES was set caused pcre2_compile() to malfunction. This bug +was found by the LLVM fuzzer. + +110. Implemented PCRE2_SUBSTITUTE_UNSET_EMPTY, and updated pcre2test to make it +possible to test it. + +111. "Harden" pcre2test against ridiculously large values in modifiers and +command line arguments. + +112. Implemented PCRE2_SUBSTITUTE_UNKNOWN_UNSET and PCRE2_SUBSTITUTE_OVERFLOW_ +LENGTH. + +113. Fix printing of *MARK names that contain binary zeroes in pcre2test. + + +Version 10.20 30-June-2015 +-------------------------- + +1. Callouts with string arguments have been added. + +2. Assertion code generator in JIT has been optimized. + +3. The invalid pattern (?(?C) has a missing assertion condition at the end. The +pcre2_compile() function read past the end of the input before diagnosing an +error. This bug was discovered by the LLVM fuzzer. + +4. Implemented pcre2_callout_enumerate(). + +5. Fix JIT compilation of conditional blocks whose assertion is converted to +(*FAIL). E.g: /(?(?!))/. + +6. The pattern /(?(?!)^)/ caused references to random memory. This bug was +discovered by the LLVM fuzzer. + +7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly +when this assertion was used as a condition, for example (?(?!)a|b). In +pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect +error about an unsupported item. + +8. For some types of pattern, for example /Z*(|d*){216}/, the auto- +possessification code could take exponential time to complete. A recursion +depth limit of 1000 has been imposed to limit the resources used by this +optimization. This infelicity was discovered by the LLVM fuzzer. + +9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class +such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored +because \S ensures they are all in the class. The code for doing this was +interacting badly with the code for computing the amount of space needed to +compile the pattern, leading to a buffer overflow. This bug was discovered by +the LLVM fuzzer. + +10. A pattern such as /((?2)+)((?1))/ which has mutual recursion nested inside +other kinds of group caused stack overflow at compile time. This bug was +discovered by the LLVM fuzzer. + +11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment +between a subroutine call and its quantifier was incorrectly compiled, leading +to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer. + +12. The illegal pattern /(?(?.*!.*)?)/ was not being diagnosed as missing an +assertion after (?(. The code was failing to check the character after (?(?< +for the ! or = that would indicate a lookbehind assertion. This bug was +discovered by the LLVM fuzzer. + +13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with +a fixed maximum following a group that contains a subroutine reference was +incorrectly compiled and could trigger buffer overflow. This bug was discovered +by the LLVM fuzzer. + +14. Negative relative recursive references such as (?-7) to non-existent +subpatterns were not being diagnosed and could lead to unpredictable behaviour. +This bug was discovered by the LLVM fuzzer. + +15. The bug fixed in 14 was due to an integer variable that was unsigned when +it should have been signed. Some other "int" variables, having been checked, +have either been changed to uint32_t or commented as "must be signed". + +16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1))) +caused a stack overflow instead of the diagnosis of a non-fixed length +lookbehind assertion. This bug was discovered by the LLVM fuzzer. + +17. The use of \K in a positive lookbehind assertion in a non-anchored pattern +(e.g. /(?<=\Ka)/) could make pcre2grep loop. + +18. There was a similar problem to 17 in pcre2test for global matches, though +the code there did catch the loop. + +19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*), +and a subsequent item in the pattern caused a non-match, backtracking over the +repeated \X did not stop, but carried on past the start of the subject, causing +reference to random memory and/or a segfault. There were also some other cases +where backtracking after \C could crash. This set of bugs was discovered by the +LLVM fuzzer. + +20. The function for finding the minimum length of a matching string could take +a very long time if mutual recursion was present many times in a pattern, for +example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has +been implemented. This infelicity was discovered by the LLVM fuzzer. + +21. Implemented PCRE2_NEVER_BACKSLASH_C. + +22. The feature for string replication in pcre2test could read from freed +memory if the replication required a buffer to be extended, and it was not +working properly in 16-bit and 32-bit modes. This issue was discovered by a +fuzzer: see http://lcamtuf.coredump.cx/afl/. + +23. Added the PCRE2_ALT_CIRCUMFLEX option. + +24. Adjust the treatment of \8 and \9 to be the same as the current Perl +behaviour. + +25. Static linking against the PCRE2 library using the pkg-config module was +failing on missing pthread symbols. + +26. If a group that contained a recursive back reference also contained a +forward reference subroutine call followed by a non-forward-reference +subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to +compile correct code, leading to undefined behaviour or an internally detected +error. This bug was discovered by the LLVM fuzzer. + +27. Quantification of certain items (e.g. atomic back references) could cause +incorrect code to be compiled when recursive forward references were involved. +For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. This bug was +discovered by the LLVM fuzzer. + +28. A repeated conditional group whose condition was a reference by name caused +a buffer overflow if there was more than one group with the given name. This +bug was discovered by the LLVM fuzzer. + +29. A recursive back reference by name within a group that had the same name as +another group caused a buffer overflow. For example: /(?J)(?'d'(?'d'\g{d}))/. +This bug was discovered by the LLVM fuzzer. + +30. A forward reference by name to a group whose number is the same as the +current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused a +buffer overflow at compile time. This bug was discovered by the LLVM fuzzer. + +31. Fix -fsanitize=undefined warnings for left shifts of 1 by 31 (it treats 1 +as an int; fixed by writing it as 1u). + +32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives +a warning for "fileno" unless -std=gnu99 us used. + +33. A lookbehind assertion within a set of mutually recursive subpatterns could +provoke a buffer overflow. This bug was discovered by the LLVM fuzzer. + +34. Give an error for an empty subpattern name such as (?''). + +35. Make pcre2test give an error if a pattern that follows #forbud_utf contains +\P, \p, or \X. + +36. The way named subpatterns are handled has been refactored. There is now a +pre-pass over the regex which does nothing other than identify named +subpatterns and count the total captures. This means that information about +named patterns is known before the rest of the compile. In particular, it means +that forward references can be checked as they are encountered. Previously, the +code for handling forward references was contorted and led to several errors in +computing the memory requirements for some patterns, leading to buffer +overflows. + +37. There was no check for integer overflow in subroutine calls such as (?123). + +38. The table entry for \l in EBCDIC environments was incorrect, leading to its +being treated as a literal 'l' instead of causing an error. + +39. If a non-capturing group containing a conditional group that could match +an empty string was repeated, it was not identified as matching an empty string +itself. For example: /^(?:(?(1)x|)+)+$()/. + +40. In an EBCDIC environment, pcretest was mishandling the escape sequences +\a and \e in test subject lines. + +41. In an EBCDIC environment, \a in a pattern was converted to the ASCII +instead of the EBCDIC value. + +42. The handling of \c in an EBCDIC environment has been revised so that it is +now compatible with the specification in Perl's perlebcdic page. + +43. Single character repetition in JIT has been improved. 20-30% speedup +was achieved on certain patterns. + +44. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in +ASCII/Unicode. This has now been added to the list of characters that are +recognized as white space in EBCDIC. + +45. When PCRE2 was compiled without Unicode support, the use of \p and \P gave +an error (correctly) when used outside a class, but did not give an error +within a class. + +46. \h within a class was incorrectly compiled in EBCDIC environments. + +47. JIT should return with error when the compiled pattern requires +more stack space than the maximum. + +48. Fixed a memory leak in pcre2grep when a locale is set. + + +Version 10.10 06-March-2015 +--------------------------- + +1. When a pattern is compiled, it remembers the highest back reference so that +when matching, if the ovector is too small, extra memory can be obtained to +use instead. A conditional subpattern whose condition is a check on a capture +having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is +another kind of back reference, but it was not setting the highest +backreference number. This mattered only if pcre2_match() was called with an +ovector that was too small to hold the capture, and there was no other kind of +back reference (a situation which is probably quite rare). The effect of the +bug was that the condition was always treated as FALSE when the capture could +not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug +has been fixed. + +2. Functions for serialization and deserialization of sets of compiled patterns +have been added. + +3. The value that is returned by PCRE2_INFO_SIZE has been corrected to remove +excess code units at the end of the data block that may occasionally occur if +the code for calculating the size over-estimates. This change stops the +serialization code copying uninitialized data, to which valgrind objects. The +documentation of PCRE2_INFO_SIZE was incorrect in stating that the size did not +include the general overhead. This has been corrected. + +4. All code units in every slot in the table of group names are now set, again +in order to avoid accessing uninitialized data when serializing. + +5. The (*NO_JIT) feature is implemented. + +6. If a bug that caused pcre2_compile() to use more memory than allocated was +triggered when using valgrind, the code in (3) above passed a stupidly large +value to valgrind. This caused a crash instead of an "internal error" return. + +7. A reference to a duplicated named group (either a back reference or a test +for being set in a conditional) that occurred in a part of the pattern where +PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern +to be incorrectly calculated, leading to overwriting. + +8. A mutually recursive set of back references such as (\2)(\1) caused a +segfault at compile time (while trying to find the minimum matching length). +The infinite loop is now broken (with the minimum length unset, that is, zero). + +9. If an assertion that was used as a condition was quantified with a minimum +of zero, matching went wrong. In particular, if the whole group had unlimited +repetition and could match an empty string, a segfault was likely. The pattern +(?(?=0)?)+ is an example that caused this. Perl allows assertions to be +quantified, but not if they are being used as conditions, so the above pattern +is faulted by Perl. PCRE2 has now been changed so that it also rejects such +patterns. + +10. The error message for an invalid quantifier has been changed from "nothing +to repeat" to "quantifier does not follow a repeatable item". + +11. If a bad UTF string is compiled with NO_UTF_CHECK, it may succeed, but +scanning the compiled pattern in subsequent auto-possessification can get out +of step and lead to an unknown opcode. Previously this could have caused an +infinite loop. Now it generates an "internal error" error. This is a tidyup, +not a bug fix; passing bad UTF with NO_UTF_CHECK is documented as having an +undefined outcome. + +12. A UTF pattern containing a "not" match of a non-ASCII character and a +subroutine reference could loop at compile time. Example: /[^\xff]((?1))/. + +13. The locale test (RunTest 3) has been upgraded. It now checks that a locale +that is found in the output of "locale -a" can actually be set by pcre2test +before it is accepted. Previously, in an environment where a locale was listed +but would not set (an example does exist), the test would "pass" without +actually doing anything. Also the fr_CA locale has been added to the list of +locales that can be used. + +14. Fixed a bug in pcre2_substitute(). If a replacement string ended in a +capturing group number without parentheses, the last character was incorrectly +literally included at the end of the replacement string. + +15. A possessive capturing group such as (a)*+ with a minimum repeat of zero +failed to allow the zero-repeat case if pcre2_match() was called with an +ovector too small to capture the group. + +16. Improved error message in pcre2test when setting the stack size (-S) fails. + +17. Fixed two bugs in CMakeLists.txt: (1) Some lines had got lost in the +transfer from PCRE1, meaning that CMake configuration failed if "build tests" +was selected. (2) The file src/pcre2_serialize.c had not been added to the list +of PCRE2 sources, which caused a failure to build pcre2test. + +18. Fixed typo in pcre2_serialize.c (DECL instead of DEFN) that causes problems +only on Windows. + +19. Use binary input when reading back saved serialized patterns in pcre2test. + +20. Added RunTest.bat for running the tests under Windows. + +21. "make distclean" was not removing config.h, a file that may be created for +use with CMake. + +22. A pattern such as "((?2){0,1999}())?", which has a group containing a +forward reference repeated a large (but limited) number of times within a +repeated outer group that has a zero minimum quantifier, caused incorrect code +to be compiled, leading to the error "internal error: previously-checked +referenced subpattern not found" when an incorrect memory address was read. +This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's +FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.) + +23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine +call within a group that also contained a recursive back reference caused +incorrect code to be compiled. This bug was reported as "heap overflow", +discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015: +CVE-2015-2326 was given to this.) + +24. Computing the size of the JIT read-only data in advance has been a source +of various issues, and new ones are still appear unfortunately. To fix +existing and future issues, size computation is eliminated from the code, +and replaced by on-demand memory allocation. + +25. A pattern such as /(?i)[A-`]/, where characters in the other case are +adjacent to the end of the range, and the range contained characters with more +than one other case, caused incorrect behaviour when compiled in UTF mode. In +that example, the range a-j was left out of the class. + + +Version 10.00 05-January-2015 +----------------------------- + +Version 10.00 is the first release of PCRE2, a revised API for the PCRE +library. Changes prior to 10.00 are logged in the ChangeLog file for the old +API, up to item 20 for release 8.36. + +The code of the library was heavily revised as part of the new API +implementation. Details of each and every modification were not individually +logged. In addition to the API changes, the following changes were made. They +are either new functionality, or bug fixes and other noticeable changes of +behaviour that were implemented after the code had been forked. + +1. Including Unicode support at build time is now enabled by default, but it +can optionally be disabled. It is not enabled by default at run time (no +change). + +2. The test program, now called pcre2test, was re-specified and almost +completely re-written. Its input is not compatible with input for pcretest. + +3. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the +PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is +matched by that pattern. + +4. For the benefit of those who use PCRE2 via some other application, that is, +not writing the function calls themselves, it is possible to check the PCRE2 +version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a +string such as "yesno". + +5. There are case-equivalent Unicode characters whose encodings use different +numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is +theoretically possible for this to happen in UTF-16 too.) If a backreference to +a group containing one of these characters was greedily repeated, and during +the match a backtrack occurred, the subject might be backtracked by the wrong +number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly +(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should +capture the final character, which is the three bytes E2, B1, and A5 in UTF-8. +Incorrect backtracking meant that group 2 captured only the last two bytes. +This bug has been fixed; the new code is slower, but it is used only when the +strings matched by the repetition are not all the same length. + +6. A pattern such as /()a/ was not setting the "first character must be 'a'" +information. This applied to any pattern with a group that matched no +characters, for example: /(?:(?=.)|(? start of atomic group +META_CIRCUMFLEX ^ metacharacter +META_CLASS [ start of non-empty class +META_CLASS_EMPTY [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS +META_CLASS_EMPTY_NOT [^] negative empty class - ditto +META_CLASS_END ] end of non-empty class +META_CLASS_NOT [^ start non-empty negative class +META_COMMIT (*COMMIT) - no argument (see below for with argument) +META_COND_ASSERT (?(?assertion) +META_DOLLAR $ metacharacter +META_DOT . metacharacter +META_END End of pattern (this value is 0x80000000) +META_FAIL (*FAIL) +META_KET ) closing parenthesis +META_LOOKAHEAD (?= start of lookahead +META_LOOKAHEAD_NA (*napla: start of non-atomic lookahead +META_LOOKAHEADNOT (?! start of negative lookahead +META_NOCAPTURE (?: no capture parens +META_PLUS + +META_PLUS_PLUS ++ +META_PLUS_QUERY +? +META_PRUNE (*PRUNE) - no argument (see below for with argument) +META_QUERY ? +META_QUERY_PLUS ?+ +META_QUERY_QUERY ?? +META_RANGE_ESCAPED hyphen in class range with at least one escape +META_RANGE_LITERAL hyphen in class range defined literally +META_SKIP (*SKIP) - no argument (see below for with argument) +META_THEN (*THEN) - no argument (see below for with argument) +META_ECLASS_AND && (or &) in an extended character class +META_ECLASS_OR || (or |, +) in an extended character class +META_ECLASS_SUB -- (or -) in an extended character class +META_ECLASS_XOR ~~ (or ^) in an extended character class +META_ECLASS_NOT ! in an extended character class + +The two RANGE values occur only in character classes. They are positioned +between two literals that define the start and end of the range. In an EBCDIC +environment it is necessary to know whether either of the range values was +specified as an escape. In an ASCII/Unicode environment the distinction is not +relevant. + +The following have data in the lower 16 bits, and may be followed by other data +elements: + +META_ALT | alternation +META_BACKREF back reference +META_CAPTURE start of capturing group +META_ESCAPE non-literal escape sequence +META_RECURSE recursion call + +If the data for META_ALT is non-zero, it is inside a lookbehind, and the data +is the maximum length of its branch (see META_LOOKBEHIND below for more +detail). + +META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as +their data in the lower 16 bits of the element. META_RECURSE is followed by an +offset, for use in error messages. + +META_BACKREF is followed by an offset if the back reference group number is 10 +or more. The offsets of the first occurrences of references to groups whose +numbers are less than 10 are put in cb->small_ref_offset[] (only the first +occurrence is useful). On 64-bit systems this avoids using more than two parsed +pattern elements for items such as \3. The offset is used when an error occurs +because the reference is to a non-existent group. + +META_ESCAPE is used for escapes such as \d that match a character. It has an +ESC_xxx value as its data. For ESC_P and ESC_p, the next element contains the +16-bit type and data property values, packed together. Escape sequences such as +\g and \k are turned into other items like META_RECURSE or META_BACKREF and +their ESC_xxx values never occur with META_ESCAPE. + +The following have one data item that follows in the next vector element: + +META_BIGVALUE Next is a literal >= META_END +META_POSIX POSIX class item (data identifies the class) +META_POSIX_NEG negative POSIX class item (ditto) + +The following are followed by a length element, then a number of character code +values (which should match with the length): + +META_MARK (*MARK:xxxx) +META_COMMIT_ARG )*COMMIT:xxxx) +META_PRUNE_ARG (*PRUNE:xxx) +META_SKIP_ARG (*SKIP:xxxx) +META_THEN_ARG (*THEN:xxxx) + +The following are followed by a length element, then an offset in the pattern +that identifies the name: + +META_COND_NAME (?() or (?('name') or (?(name) +META_COND_RNAME (?(R&name) +META_COND_RNUMBER (?(Rdigits) +META_RECURSE_BYNAME (?&name) +META_BACKREF_BYNAME \k'name' or \k or \k{name} or \g{name} +META_SCS_NAME (*scs:()...) + +META_COND_RNUMBER is used for names that start with R and continue with digits, +because this is an ambiguous case. It could be a back reference to a group with +that name, or it could be a recursion test on a numbered group. + +These are followed by an offset, for use in error messages, then a number: + +META_COND_NUMBER (?([+-]digits) +META_SCS_NUMBER (*scs:(digits)...) + +The following is followed just by an offset, for use in error messages: + +META_COND_DEFINE (?(DEFINE) + +The following are at first also followed just by an offset for use in error +messages. After the lengths of the branches of a lookbehind group have been +checked the error offset is no longer needed. The lower 16 bits of the main +word are now set to the maximum length of the first branch of the lookbehind +group, and the second word is set to the minimum matching length for a +variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose +branches are all of fixed length. These values are used when generating +OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used +for any subsequent branches because there is only room for one value (the +branch maximum length) in a META_ALT item. + +META_LOOKBEHIND (?<= start of lookbehind +META_LOOKBEHIND_NA (*naplb: start of non-atomic lookbehind +META_LOOKBEHINDNOT (?' and 1 for '>='; +the next two are the major and minor numbers: + +META_COND_VERSION (?(VERSIONx.y) + +Callouts are converted into one of two items: + +META_CALLOUT_NUMBER (?C with numerical argument +META_CALLOUT_STRING (?C with string argument + +In both cases, the next two elements contain the offset and length of the next +item in the pattern. Then there is either one callout number, or a length and +an offset for the string argument. The length includes both delimiters. + + +Traditional matching function +----------------------------- + +The "traditional", and original, matching function is called pcre2_match(), and +it implements an NFA algorithm, similar to the original Henry Spencer algorithm +and the way that Perl works. This is not surprising, since it is intended to be +as compatible with Perl as possible. This is the function most users of PCRE2 +will use most of the time. If PCRE2 is compiled with just-in-time (JIT) +support, and studying a compiled pattern with JIT is successful, the JIT code +is run instead of the normal pcre2_match() code, but the result is the same. + +The interpreter used to implement backtracking by means of recursive function +calls, but this gave rise to regular complaints when patterns with large search +trees ran out of stack. There was for a while a fudge that used the heap +instead, but this was inefficient and slow. In 2017 I re-wrote pcre2_match() as +a single, non-recursive function that implements backtracking via a vector of +"frames" on the heap, each frame representing a backtracking point. As well as +standard information such as the position in the pattern and position in the +subject, each frame has a number of unassigned variables that can be used +locally to preserve values at a backtracking point. C macros are used +extensively to implement all of this. + + +Supplementary matching function +------------------------------- + +There is a supplementary matching function called pcre2_dfa_match() that +implements a DFA matching algorithm that searches simultaneously for all +possible matches that start at one point in the subject string. (Going back to +my roots: see Historical Note 1 above.) This function intreprets the same +compiled pattern data as pcre2_match(); however, not all the facilities are +available, and those that are do not always work in quite the same way. In +particular, capturing parentheses and backreferences are not supported. See the +user documentation for details. + +The algorithm that is used for pcre2_dfa_match() is not a traditional FSM, +because it may have a number of states active at one time. More work would be +needed at compile time to produce a traditional FSM where only one state is +ever active at once. I believe some other regex matchers work this way. JIT +support is not available for this kind of matching. + + +Changeable options +------------------ + +The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and +some others may be changed in the middle of patterns by items such as (?i). +Their processing is handled entirely at compile time by generating different +opcodes for the different settings. Some options are copied into the opcode's +data, for opcodes such as OP_REFI which depends on the (?r) +(PCRE2_EXTRA_CASELESS_RESTRICT) option. The runtime functions do not need to +keep track of an option's state. + +PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE +are tracked and processed during the parsing pre-pass. The others are handled +from META_OPTIONS items during the main compile phase. + + +Format of compiled patterns +--------------------------- + +The compiled form of a pattern is a vector of unsigned code units (bytes in +8-bit mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing +items of variable length. The first code unit in an item contains an opcode, +and the length of the item is either implicit in the opcode or contained in the +data that follows it. + +In many cases listed below, LINK_SIZE data values are specified for offsets +within the compiled pattern. LINK_SIZE always specifies a number of bytes. The +default value for LINK_SIZE is 2, except for the 32-bit library, where it can +only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values, +and the 16-bit library can be compiled to use 4-byte values, though this +impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries +is necessary only when patterns whose compiled length is greater than 65535 +code units are going to be processed. When a LINK_SIZE value uses more than one +code unit, the most significant unit is first. + +In this description, we assume the "normal" compilation options. Data values +that are counts (e.g. quantifiers) are always two bytes long in 8-bit mode +(most significant byte first), and one code unit in 16-bit and 32-bit modes. + + +Opcodes with no following data +------------------------------ + +These items are all just one code unit long: + + OP_END end of pattern + OP_ANY match any one character other than newline + OP_ALLANY match any one character, including newline + OP_ANYBYTE match any single code unit, even in UTF-8/16 mode + OP_SOD match start of data: \A + OP_SOM, start of match (subject + offset): \G + OP_SET_SOM, set start of match (\K) + OP_CIRC ^ (start of data) + OP_CIRCM ^ multiline mode (start of data or after newline) + OP_NOT_WORD_BOUNDARY \W + OP_WORD_BOUNDARY \w + OP_NOT_DIGIT \D + OP_DIGIT \d + OP_NOT_HSPACE \H + OP_HSPACE \h + OP_NOT_WHITESPACE \S + OP_WHITESPACE \s + OP_NOT_VSPACE \V + OP_VSPACE \v + OP_NOT_WORDCHAR \W + OP_WORDCHAR \w + OP_EODN match end of data or newline at end: \Z + OP_EOD match end of data: \z + OP_DOLL $ (end of data, or before final newline) + OP_DOLLM $ multiline mode (end of data or before newline) + OP_EXTUNI match an extended Unicode grapheme cluster + OP_ANYNL match any Unicode newline sequence + + OP_ASSERT_ACCEPT ) + OP_ACCEPT ) These are Perl 5.10's "backtracking control + OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing + OP_FAIL ) parentheses, it may be preceded by one or more + OP_PRUNE ) OP_CLOSE, each followed by a number that + OP_SKIP ) indicates which parentheses must be closed. + OP_THEN ) + +OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. +This ends the assertion, not the entire pattern match. The assertion (?!) is +always optimized to OP_FAIL. + +OP_ALLANY is used for '.' when PCRE2_DOTALL is set. It is also used for \C in +non-UTF modes and in UTF-32 mode (since one code unit still equals one +character). Another use is for [^] when empty classes are permitted +(PCRE2_ALLOW_EMPTY_CLASS is set). + + +Backtracking control verbs +-------------------------- + +Verbs with no arguments generate opcodes with no following data (as listed +in the section above). + +(*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a +length in one code unit, and followed by a binary zero. The name length is +limited by the size of the code unit. + +(*ACCEPT:NAME) and (*FAIL:NAME) are compiled as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL) respectively. + +For (*COMMIT:NAME), (*PRUNE:NAME), (*SKIP:NAME), and (*THEN:NAME), the opcodes +OP_COMMIT_ARG, OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the +name following in the same format as for OP_MARK. + + +Matching literal characters +--------------------------- + +The OP_CHAR opcode is followed by a single character that is to be matched +casefully. For caseless matching of characters that have at most two +case-equivalent code points, OP_CHARI is used. In UTF-8 or UTF-16 modes, the +character may be more than one code unit long. In UTF-32 mode, characters are +always exactly one code unit long. + +If there is only one character in a character class, OP_CHAR or OP_CHARI is +used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is, +for something like [^a]). + +Caseless matching (positive or negative) of characters that have more than two +case-equivalent code points (which is possible only in UTF mode) is handled by +compiling a Unicode property item (see below), with the pseudo-property +PT_CLIST. The value of this property is an offset in a vector called +"ucd_caseless_sets" which identifies the start of a short list of case +equivalent characters, terminated by the value NOTACHAR (0xffffffff). + + +Repeating single characters +--------------------------- + +The common repeats (*, +, ?), when applied to a single character, use the +following opcodes, which come in caseful and caseless versions: + + Caseful Caseless + OP_STAR OP_STARI + OP_MINSTAR OP_MINSTARI + OP_POSSTAR OP_POSSTARI + OP_PLUS OP_PLUSI + OP_MINPLUS OP_MINPLUSI + OP_POSPLUS OP_POSPLUSI + OP_QUERY OP_QUERYI + OP_MINQUERY OP_MINQUERYI + OP_POSQUERY OP_POSQUERYI + +Each opcode is followed by the character that is to be repeated. In ASCII or +UTF-32 modes, these are two-code-unit items; in UTF-8 or UTF-16 modes, the +length is variable. Those with "MIN" in their names are the minimizing +versions. Those with "POS" in their names are possessive versions. Other kinds +of repeat make use of these opcodes: + + Caseful Caseless + OP_UPTO OP_UPTOI + OP_MINUPTO OP_MINUPTOI + OP_POSUPTO OP_POSUPTOI + OP_EXACT OP_EXACTI + +Each of these is followed by a count and then the repeated character. The count +is two bytes long in 8-bit mode (most significant byte first), or one code unit +in 16-bit and 32-bit modes. + +OP_UPTO matches from 0 to the given number. A repeat with a non-zero minimum +and a fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or +OP_MINUPTO or OPT_POSUPTO). + +Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI, +etc.) are used for repeated, negated, single-character classes such as [^a]*. +The normal single-character opcodes (OP_STAR, etc.) are used for repeated +positive single-character classes. + + +Repeating character types +------------------------- + +Repeats of things like \d are done exactly as for single characters, except +that instead of a character, the opcode for the type (e.g. OP_DIGIT) is stored +in the next code unit. The opcodes are: + + OP_TYPESTAR + OP_TYPEMINSTAR + OP_TYPEPOSSTAR + OP_TYPEPLUS + OP_TYPEMINPLUS + OP_TYPEPOSPLUS + OP_TYPEQUERY + OP_TYPEMINQUERY + OP_TYPEPOSQUERY + OP_TYPEUPTO + OP_TYPEMINUPTO + OP_TYPEPOSUPTO + OP_TYPEEXACT + + +Match by Unicode property +------------------------- + +OP_PROP and OP_NOTPROP are used for positive and negative matches of a +character by testing its Unicode property (the \p and \P escape sequences). +Each is followed by two code units that encode the desired property as a type +and a value. The types are a set of #defines of the form PT_xxx, and the values +are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file. +The value is relevant only for PT_GC (General Category), PT_PC (Particular +Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property), +and the pseudo-property PT_CLIST, which is used to identify a list of +case-equivalent characters when there are three or more (see above). + +Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by +three code units: OP_PROP or OP_NOTPROP, and then the desired property type and +value. + + +Character classes +----------------- + +If there is only one character in a class, OP_CHAR or OP_CHARI is used for a +positive class, and OP_NOT or OP_NOTI for a negative one (that is, for +something like [^a]), except when caselessly matching a character that has more +than two case-equivalent code points (which can happen only in UTF mode). In +this case a Unicode property item is used, as described above in "Matching +literal characters". + +A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated, +negated, single-character classes. The normal single-character opcodes +(OP_STAR, etc.) are used for repeated positive single-character classes. + +When there is more than one character in a class, and all the code points are +less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a +negative one. In either case, the opcode is followed by a 32-byte (16-short, +8-word) bit map containing a 1 bit for every character that is acceptable. The +bits are counted from the least significant end of each unit. In caseless mode, +bits for both cases are set. + +The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 and +16-bit and 32-bit modes, subject characters with values greater than 255 can be +handled correctly. For OP_CLASS they do not match, whereas for OP_NCLASS they +do. + +For classes containing characters with values greater than 255 or that contain +\p or \P, OP_XCLASS is used. It optionally uses a bit map if any acceptable +code points are less than 256. After the bit map, the properties of the +character class are listed, if they are present. The items in the list +follows the declaration order of the pattern string. The property list +is followed by single characters and/or character ranges, if they are +present. The characters/ranges are sorted in ascending order, and at +least one non-matching character must be present between any two of +them. In caseless mode, all equivalent characters are explicitly listed. + +OP_XCLASS is followed by a LINK_SIZE value containing the total length of the +opcode and its data. This is followed by a code unit containing flag bits: +XCL_NOT indicates that this is a negative class, and XCL_MAP indicates that a +bit map is present. There follows the bit map, if XCL_MAP is set, and then a +sequence of items coded as follows: + + XCL_END marks the end of the list + XCL_SINGLE one character follows + XCL_RANGE two characters follow + XCL_PROP a Unicode property (type, value) follows + XCL_NOTPROP a Unicode property (type, value) follows + +If a range starts with a code point less than 256 and ends with one greater +than 255, it is split into two ranges, with characters less than 256 being +indicated in the bit map, and the rest with XCL_RANGE. + +When XCL_NOT is set, the bit map, if present, contains bits for characters that +are allowed (exactly as for OP_NCLASS), but the list of items that follow it +specifies characters and properties that are not allowed. + +The meaning of the bitmap indicated by XCL_MAP is that, if one is present, then +it fully describes which code points < 256 match the class (without needing to +invert the check according to XCL_NOT); the other items in the OP_XCLASS need +not be consulted. However, if a bitmap is not present, then code points < 256 +may still match, so the other items in the OP_XCLASS must be consulted. + +For classes containing logical expressions, such as "[\p{Greek} && \p{Lu}]" for +"uppercase Greek letters", OP_ECLASS is used. The expression is encoded as a a +stack-based series of operands and operators, in Reverse Polish Notation. Like +an OP_XCLASS, the OP_ECLASS is first followed by a LINK_SIZE value containing +the total length of the opcode and its data. That is followed by a code unit +containing flags: currently just ECL_MAP indicating that a bit map is present. +There follows the bit map, if ECL_MAP is set. Finally, there is a sequence of +items that are either an operand or operator. Each item starts with a single +code unit containing its type: + + ECL_AND AND; no additional data + ECL_OR OR; no additional data + ECL_XOR XOR; no additional data + ECL_NOT NOT; no additional data + ECL_XCLASS The additional data which follows ECL_XCLASS is the same as for + an OP_XCLASS, except that this data is preceded by ECL_XCLASS + rather than OP_XCLASS. + Because the OP_ECLASS has its own bitmap (if required), an + ECL_XCLASS should not contain a bitmap. + +Additionally, there are two intermediate values used during compilation, but +these are folded away during generation of the opcode, and so never appear +inside an OP_ECLASS at match time. They are: + + ECL_ANY match all characters; no additional data + ECL_NONE match no characters; no additional data + +The meaning of the bitmap indicated by ECL_MAP is the same as XCL_MAP. +If the bitmap is present, all codepoints < 256 are checked against the bitmap. + + +Back references +--------------- + +OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the +reference number when the reference is to a unique capturing group (either by +number or by name). When named groups are used, there may be more than one +group with the same name. In this case, a reference to such a group by name +generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index +(not the byte offset) in the group name table of the first entry for the +required name, followed by the number of groups with the same name. The +matching code can then search for the first one that is set. + +OP_REFI and OP_DNREFI are further followed by an item containing any +case-insensitivity flags. + + +Repeating character classes and back references +----------------------------------------------- + +Single-character classes are handled specially (see above). This section +applies to other classes and also to back references. In both cases, the repeat +information follows the base item. The matching code looks at the following +opcode to see if it is one of these: + + OP_CRSTAR + OP_CRMINSTAR + OP_CRPOSSTAR + OP_CRPLUS + OP_CRMINPLUS + OP_CRPOSPLUS + OP_CRQUERY + OP_CRMINQUERY + OP_CRPOSQUERY + OP_CRRANGE + OP_CRMINRANGE + OP_CRPOSRANGE + +All but the last three are single-code-unit items, with no data. The range +opcodes are followed by the minimum and maximum repeat counts. + + +Brackets and alternation +------------------------ + +A pair of non-capturing round brackets is wrapped round each expression at +compile time, so alternation always happens in the context of brackets. + +[Note for North Americans: "bracket" to some English speakers, including +myself, can be round, square, curly, or pointy. Hence this usage rather than +"parentheses".] + +Non-capturing brackets use the opcode OP_BRA, capturing brackets use OP_CBRA. A +bracket opcode is followed by a LINK_SIZE value which gives the offset to the +next alternative OP_ALT or, if there aren't any branches, to the terminating +opcode. Each OP_ALT is followed by a LINK_SIZE value giving the offset to the +next one, or to the final opcode. For capturing brackets, the bracket number is +a count that immediately follows the offset. + +There are several opcodes that mark the end of a subpattern group. OP_KET is +used for subpatterns that do not repeat indefinitely, OP_KETRMIN and +OP_KETRMAX are used for indefinite repetitions, minimally or maximally +respectively, and OP_KETRPOS for possessive repetitions (see below for more +details). All four are followed by a LINK_SIZE value giving (as a positive +number) the offset back to the matching opening bracket opcode. + +If a subpattern is quantified such that it is permitted to match zero times, it +is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are +single-unit opcodes that tell the matcher that skipping the following +subpattern entirely is a valid match. In the case of the first two, not +skipping the pattern is also valid (greedy and non-greedy). The third is used +when a pattern has the quantifier {0,0}. It cannot be entirely discarded, +because it may be called as a subroutine from elsewhere in the pattern. + +A subpattern with an indefinite maximum repetition is replicated in the +compiled data its minimum number of times (or once with OP_BRAZERO if the +minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX +as appropriate. + +A subpattern with a bounded maximum repetition is replicated in a nested +fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO +before each replication after the minimum, so that, for example, (abc){2,5} is +compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group +has the same number. + +When a repeated subpattern has an unbounded upper limit, it is checked to see +whether it could match an empty string. If this is the case, the opcode in the +final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher +that it needs to check for matching an empty string when it hits OP_KETRMIN or +OP_KETRMAX, and if so, to break the loop. + + +Possessive brackets +------------------- + +When a repeated group (capturing or non-capturing) is marked as possessive by +the "+" notation, e.g. (abc)++, different opcodes are used. Their names all +have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCBRAPOS instead +of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum +repetition is zero, the group is preceded by OP_BRAPOSZERO. + + +Once-only (atomic) groups +------------------------- + +These are just like other subpatterns, but they start with the opcode OP_ONCE. +The check for matching an empty string in an unbounded repeat is handled +entirely at runtime, so there is just this one opcode for atomic groups. + + +Assertions +---------- + +Forward assertions are also just like other subpatterns, but starting with one +of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or +OP_ASSERT_NOT. + +Backward assertions use the opcodes OP_ASSERTBACK, OP_ASSERTBACK_NA, and +OP_ASSERTBACK_NOT. If all the branches of a backward assertion are of fixed +length (not necessarily the same), the first opcode inside each branch is +OP_REVERSE, followed by an IMM2_SIZE count of the number of characters to move +back the pointer in the subject string, thus allowing each branch to have a +different (but fixed) length. + +Variable-length backward assertions whose maximum matching length is limited +are also supported. For such assertions, the first opcode inside each branch is +OP_VREVERSE, followed by the minimum and maximum lengths for that branch, +unless these happen to be equal, in which case OP_REVERSE is used. These +IMM2_SIZE values occupy two code units each in 8-bit mode, and 1 code unit in +16/32 bit modes. + +In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are +also the number of code units, but in UTF-8/16 mode each character may occupy +more than one code unit. + +The "scan substring" assertion compiles as OP_ASSERT_SCS. This opcode is +followed by a list of arguments. Each argument is either an OP_CREF or +OP_DNCREF byte code sequence. The details of these sequences are described +in the next section. + +For example (*scs:(1,'NAME')...PATTERN...) is translated to: +[OP_ASSERT_SCS] [OP_CREF] [OP_CREF] ...PATTERN... [OP_KET] + +If 'NAME' is a duplicated name, the second [OP_CREF] is [OP_DNCREF] instead. + + +Conditional subpatterns +----------------------- + +These are like other subpatterns, but they start with the opcode OP_COND, or +OP_SCOND for one that might match an empty string in an unbounded repeat. + +If the condition is a back reference, this is stored at the start of the +subpattern using the opcode OP_CREF followed by a count containing the +reference number, provided that the reference is to a unique capturing group. +If the reference was by name and there is more than one group with that name, +OP_DNCREF is used instead. It is followed by two counts: the index in the group +names table, and the number of groups with the same name. The allows the +matcher to check if any group with the given name is set. + +If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of +group x" (coded as "(?(Rx)"), the group number is stored at the start of the +subpattern using the opcode OP_RREF (with a value of RREF_ANY (0xffff) for "the +whole pattern") or OP_DNRREF (with data as for OP_DNCREF). + +For a DEFINE condition, OP_FALSE is used (with no associated data). During +compilation, however, a DEFINE condition is coded as OP_DEFINE so that, when +the conditional group is complete, there can be a check to ensure that it +contains only one top-level branch. Once this has happened, the opcode is +changed to OP_FALSE, so the matcher never sees OP_DEFINE. + +There is a special PCRE2-specific condition of the form (VERSION[>]=x.y), which +tests the PCRE2 version number. This compiles into one of the opcodes OP_TRUE +or OP_FALSE. + +If a condition is not a back reference, recursion test, DEFINE, or VERSION, it +must start with a parenthesized atomic assertion, whose opcode normally +immediately follows OP_COND or OP_SCOND. However, if automatic callouts are +enabled, a callout is inserted immediately before the assertion. It is also +possible to insert a manual callout at this point. Only assertion conditions +may have callouts preceding the condition. + +A condition that is the negative assertion (?!) is optimized to OP_FAIL in all +parts of the pattern, so this is another opcode that may appear as a condition. +It is treated the same as OP_FALSE. + + +Recursion +--------- + +Recursion either matches the current pattern, or some subexpression. The opcode +OP_RECURSE is followed by a LINK_SIZE value that is the offset to the starting +bracket from the start of the whole pattern. OP_RECURSE is also used for +"subroutine" calls, even though they are not strictly a recursion. Up till +release 10.30 recursions were treated as atomic groups, making them +incompatible with Perl (but PCRE had them well before Perl did). From 10.30, +backtracking into recursions is supported. + +Repeated recursions used to be wrapped inside OP_ONCE brackets, which not only +forced no backtracking, but also allowed repetition to be handled as for other +bracketed groups. From 10.30 onwards, repeated recursions are duplicated for +their minimum repetitions, and then wrapped in non-capturing brackets for the +remainder. For example, (?1){3} is treated as (?1)(?1)(?1), and (?1){2,4} is +treated as (?1)(?1)(?:(?1)){0,2}. + + +Callouts +-------- + +A callout may have either a numerical argument or a string argument. These use +OP_CALLOUT or OP_CALLOUT_STR, respectively. In each case these are followed by +two LINK_SIZE values giving the offset in the pattern string to the start of +the following item, and another count giving the length of this item. These +values make it possible for pcre2test to output useful tracing information +using callouts. + +In the case of a numeric callout, after these two values there is a single code +unit containing the callout number, in the range 0-255, with 255 being used for +callouts that are automatically inserted as a result of the PCRE2_AUTO_CALLOUT +option. Thus, this opcode item is of fixed length: + + [OP_CALLOUT] [PATTERN_OFFSET] [PATTERN_LENGTH] [NUMBER] + +For callouts with string arguments, OP_CALLOUT_STR has three more data items: +a LINK_SIZE value giving the complete length of the entire opcode item, a +LINK_SIZE item containing the offset within the pattern string to the start of +the string argument, and the string itself, preceded by its starting delimiter +and followed by a binary zero. When a callout function is called, a pointer to +the actual string is passed, but the delimiter can be accessed as string[-1] if +the application needs it. In the 8-bit library, the callout in /X(?C'abc')Y/ is +compiled as the following bytes (decimal numbers represent binary values): + + [OP_CALLOUT_STR] [0] [10] [0] [1] [0] [14] [0] [5] ['] [a] [b] [c] [0] + -------- ------- -------- ------- + | | | | + ------- LINK_SIZE items ------ + +Opcode table checking +--------------------- + +The last opcode that is defined in pcre2_internal.h is OP_TABLE_LENGTH. This is +not a real opcode, but is used to check at compile time that tables indexed by +opcode are the correct length, in order to catch updating errors. + + +See also +-------- + +The file maint/README contains additional information. + + +Philip Hazel +August 2024 diff --git a/3rd/pcre2/INSTALL b/3rd/pcre2/INSTALL new file mode 100644 index 00000000..e82fd21d --- /dev/null +++ b/3rd/pcre2/INSTALL @@ -0,0 +1,368 @@ +Installation Instructions +************************* + + Copyright (C) 1994-1996, 1999-2002, 2004-2017, 2020-2021 Free +Software Foundation, Inc. + + Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. This file is offered as-is, +without warranty of any kind. + +Basic Installation +================== + + Briefly, the shell command './configure && make && make install' +should configure, build, and install this package. The following +more-detailed instructions are generic; see the 'README' file for +instructions specific to this package. Some packages provide this +'INSTALL' file but do not implement all of the features documented +below. The lack of an optional feature in a given package is not +necessarily a bug. More recommendations for GNU packages can be found +in *note Makefile Conventions: (standards)Makefile Conventions. + + The 'configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a 'Makefile' in each directory of the package. +It may also create one or more '.h' files containing system-dependent +definitions. Finally, it creates a shell script 'config.status' that +you can run in the future to recreate the current configuration, and a +file 'config.log' containing compiler output (useful mainly for +debugging 'configure'). + + It can also use an optional file (typically called 'config.cache' and +enabled with '--cache-file=config.cache' or simply '-C') that saves the +results of its tests to speed up reconfiguring. Caching is disabled by +default to prevent problems with accidental use of stale cache files. + + If you need to do unusual things to compile the package, please try +to figure out how 'configure' could check whether to do them, and mail +diffs or instructions to the address given in the 'README' so they can +be considered for the next release. If you are using the cache, and at +some point 'config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file 'configure.ac' (or 'configure.in') is used to create +'configure' by a program called 'autoconf'. You need 'configure.ac' if +you want to change it or regenerate 'configure' using a newer version of +'autoconf'. + + The simplest way to compile this package is: + + 1. 'cd' to the directory containing the package's source code and type + './configure' to configure the package for your system. + + Running 'configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type 'make' to compile the package. + + 3. Optionally, type 'make check' to run any self-tests that come with + the package, generally using the just-built uninstalled binaries. + + 4. Type 'make install' to install the programs and any data files and + documentation. When installing into a prefix owned by root, it is + recommended that the package be configured and built as a regular + user, and only the 'make install' phase executed with root + privileges. + + 5. Optionally, type 'make installcheck' to repeat any self-tests, but + this time using the binaries in their final installed location. + This target does not install anything. Running this target as a + regular user, particularly if the prior 'make install' required + root privileges, verifies that the installation completed + correctly. + + 6. You can remove the program binaries and object files from the + source code directory by typing 'make clean'. To also remove the + files that 'configure' created (so you can compile the package for + a different kind of computer), type 'make distclean'. There is + also a 'make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 7. Often, you can also type 'make uninstall' to remove the installed + files again. In practice, not all packages have tested that + uninstallation works correctly, even though it is required by the + GNU Coding Standards. + + 8. Some packages, particularly those that use Automake, provide 'make + distcheck', which can by used by developers to test that all other + targets like 'make install' and 'make uninstall' work correctly. + This target is generally not run by end users. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the 'configure' script does not know about. Run './configure --help' +for details on some of the pertinent environment variables. + + You can give 'configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here is +an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU 'make'. 'cd' to the +directory where you want the object files and executables to go and run +the 'configure' script. 'configure' automatically checks for the source +code in the directory that 'configure' is in and in '..'. This is known +as a "VPATH" build. + + With a non-GNU 'make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use 'make distclean' before +reconfiguring for another architecture. + + On MacOS X 10.5 and later systems, you can create libraries and +executables that work on multiple system types--known as "fat" or +"universal" binaries--by specifying multiple '-arch' options to the +compiler but only a single '-arch' option to the preprocessor. Like +this: + + ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ + CPP="gcc -E" CXXCPP="g++ -E" + + This is not guaranteed to produce working output in all cases, you +may have to build one architecture at a time and combine the results +using the 'lipo' tool if you have problems. + +Installation Names +================== + + By default, 'make install' installs the package's commands under +'/usr/local/bin', include files under '/usr/local/include', etc. You +can specify an installation prefix other than '/usr/local' by giving +'configure' the option '--prefix=PREFIX', where PREFIX must be an +absolute file name. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option '--exec-prefix=PREFIX' to 'configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like '--bindir=DIR' to specify different values for particular +kinds of files. Run 'configure --help' for a list of the directories +you can set and what kinds of files go in them. In general, the default +for these options is expressed in terms of '${prefix}', so that +specifying just '--prefix' will affect all of the other directory +specifications that were not explicitly provided. + + The most portable way to affect installation locations is to pass the +correct locations to 'configure'; however, many packages provide one or +both of the following shortcuts of passing variable assignments to the +'make install' command line to change installation locations without +having to reconfigure or recompile. + + The first method involves providing an override variable for each +affected directory. For example, 'make install +prefix=/alternate/directory' will choose an alternate location for all +directory configuration variables that were expressed in terms of +'${prefix}'. Any directories that were specified during 'configure', +but not in terms of '${prefix}', must each be overridden at install time +for the entire installation to be relocated. The approach of makefile +variable overrides for each directory variable is required by the GNU +Coding Standards, and ideally causes no recompilation. However, some +platforms have known limitations with the semantics of shared libraries +that end up requiring recompilation when using this method, particularly +noticeable in packages that use GNU Libtool. + + The second method involves providing the 'DESTDIR' variable. For +example, 'make install DESTDIR=/alternate/directory' will prepend +'/alternate/directory' before all installation names. The approach of +'DESTDIR' overrides is not required by the GNU Coding Standards, and +does not work on platforms that have drive letters. On the other hand, +it does better at avoiding recompilation issues, and works well even +when some directory options were not specified in terms of '${prefix}' +at 'configure' time. + +Optional Features +================= + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving 'configure' the +option '--program-prefix=PREFIX' or '--program-suffix=SUFFIX'. + + Some packages pay attention to '--enable-FEATURE' options to +'configure', where FEATURE indicates an optional part of the package. +They may also pay attention to '--with-PACKAGE' options, where PACKAGE +is something like 'gnu-as' or 'x' (for the X Window System). The +'README' should mention any '--enable-' and '--with-' options that the +package recognizes. + + For packages that use the X Window System, 'configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the 'configure' options '--x-includes=DIR' and +'--x-libraries=DIR' to specify their locations. + + Some packages offer the ability to configure how verbose the +execution of 'make' will be. For these packages, running './configure +--enable-silent-rules' sets the default to minimal output, which can be +overridden with 'make V=1'; while running './configure +--disable-silent-rules' sets the default to verbose, which can be +overridden with 'make V=0'. + +Particular systems +================== + + On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC +is not installed, it is recommended to use the following options in +order to use an ANSI C compiler: + + ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" + +and if that doesn't work, install pre-built binaries of GCC for HP-UX. + + HP-UX 'make' updates targets which have the same timestamps as their +prerequisites, which makes it generally unusable when shipped generated +files such as 'configure' are involved. Use GNU 'make' instead. + + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot +parse its '' header file. The option '-nodtk' can be used as a +workaround. If GNU CC is not installed, it is therefore recommended to +try + + ./configure CC="cc" + +and if that doesn't work, try + + ./configure CC="cc -nodtk" + + On Solaris, don't put '/usr/ucb' early in your 'PATH'. This +directory contains several dysfunctional programs; working variants of +these programs are available in '/usr/bin'. So, if you need '/usr/ucb' +in your 'PATH', put it _after_ '/usr/bin'. + + On Haiku, software installed for all users goes in '/boot/common', +not '/usr/local'. It is recommended to use the following options: + + ./configure --prefix=/boot/common + +Specifying the System Type +========================== + + There may be some features 'configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, 'configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +'--build=TYPE' option. TYPE can either be a short name for the system +type, such as 'sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS + KERNEL-OS + + See the file 'config.sub' for the possible values of each field. If +'config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option '--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with '--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for 'configure' scripts to share, +you can create a site shell script called 'config.site' that gives +default values for variables like 'CC', 'cache_file', and 'prefix'. +'configure' looks for 'PREFIX/share/config.site' if it exists, then +'PREFIX/etc/config.site' if it exists. Or, you can set the +'CONFIG_SITE' environment variable to the location of the site script. +A warning: not all 'configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to 'configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the 'configure' command line, using 'VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified 'gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for 'CONFIG_SHELL' due to an +Autoconf limitation. Until the limitation is lifted, you can use this +workaround: + + CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash + +'configure' Invocation +====================== + + 'configure' recognizes the following options to control how it +operates. + +'--help' +'-h' + Print a summary of all of the options to 'configure', and exit. + +'--help=short' +'--help=recursive' + Print a summary of the options unique to this package's + 'configure', and exit. The 'short' variant lists options used only + in the top level, while the 'recursive' variant lists options also + present in any nested packages. + +'--version' +'-V' + Print the version of Autoconf used to generate the 'configure' + script, and exit. + +'--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally 'config.cache'. FILE defaults to '/dev/null' to + disable caching. + +'--config-cache' +'-C' + Alias for '--cache-file=config.cache'. + +'--quiet' +'--silent' +'-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to '/dev/null' (any error + messages will still be shown). + +'--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + 'configure' can determine that directory automatically. + +'--prefix=DIR' + Use DIR as the installation prefix. *note Installation Names:: for + more details, including other options available for fine-tuning the + installation locations. + +'--no-create' +'-n' + Run the configure checks, but stop before creating any output + files. + +'configure' also accepts some other, not widely useful, options. Run +'configure --help' for more details. diff --git a/3rd/pcre2/LICENCE.md b/3rd/pcre2/LICENCE.md new file mode 100644 index 00000000..f58ceb75 --- /dev/null +++ b/3rd/pcre2/LICENCE.md @@ -0,0 +1,103 @@ +PCRE2 License +============= + +| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception | +|---------|-------| + +PCRE2 is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + +Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD" +licence, as specified below, with one exemption for certain binary +redistributions. The documentation for PCRE2, supplied in the "doc" directory, +is distributed under the same terms as the software itself. The data in the +testdata directory is not copyrighted and is in the public domain. + +The basic library functions are written in C and are freestanding. Also +included in the distribution is a just-in-time compiler that can be used to +optimize pattern matching. This is an optional feature that can be omitted when +the library is built. + + +COPYRIGHT +--------- + +### The basic library functions + + Written by: Philip Hazel + Email local part: Philip.Hazel + Email domain: gmail.com + + Retired from University of Cambridge Computing Service, + Cambridge, England. + + Copyright (c) 1997-2007 University of Cambridge + Copyright (c) 2007-2024 Philip Hazel + All rights reserved. + +### PCRE2 Just-In-Time compilation support + + Written by: Zoltan Herczeg + Email local part: hzmester + Email domain: freemail.hu + + Copyright (c) 2010-2024 Zoltan Herczeg + All rights reserved. + +### Stack-less Just-In-Time compiler + + Written by: Zoltan Herczeg + Email local part: hzmester + Email domain: freemail.hu + + Copyright (c) 2009-2024 Zoltan Herczeg + All rights reserved. + +### All other contributions + +Many other contributors have participated in the authorship of PCRE2. As PCRE2 +has never required a Contributor Licensing Agreement, or other copyright +assignment agreement, all contributions have copyright retained by each +original contributor or their employer. + + +THE "BSD" LICENCE +----------------- + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notices, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notices, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of the University of Cambridge nor the names of any + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + +EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES +------------------------------------------ + +The second condition in the BSD licence (covering binary redistributions) does +not apply all the way down a chain of software. If binary package A includes +PCRE2, it must respect the condition, but if package B is software that +includes package A, the condition is not imposed on package B unless it uses +PCRE2 independently. + +End diff --git a/3rd/pcre2/MODULE.bazel b/3rd/pcre2/MODULE.bazel new file mode 100644 index 00000000..8f4c0b46 --- /dev/null +++ b/3rd/pcre2/MODULE.bazel @@ -0,0 +1,9 @@ +module( + name = "pcre2", + version = "10.45", + compatibility_level = 1, +) + +bazel_dep(name = "rules_cc", version = "0.0.1") +bazel_dep(name = "bazel_skylib", version = "1.2.1") +bazel_dep(name = "platforms", version = "0.0.4") diff --git a/3rd/pcre2/Makefile.am b/3rd/pcre2/Makefile.am new file mode 100644 index 00000000..8aaa0cc5 --- /dev/null +++ b/3rd/pcre2/Makefile.am @@ -0,0 +1,978 @@ +## Process this file with automake to produce Makefile.in. + +AUTOMAKE_OPTIONS = subdir-objects +ACLOCAL_AMFLAGS = -I m4 + +## This seems to have become necessary for building in non-source directory. + +AM_CPPFLAGS="-I$(srcdir)/src" + +## Specify the documentation files that are distributed. + +dist_doc_DATA = \ + AUTHORS.md \ + COPYING \ + ChangeLog \ + LICENCE.md \ + NEWS \ + README \ + SECURITY.md \ + doc/pcre2.txt \ + doc/pcre2-config.txt \ + doc/pcre2grep.txt \ + doc/pcre2test.txt + +dist_html_DATA = \ + doc/html/NON-AUTOTOOLS-BUILD.txt \ + doc/html/README.txt \ + doc/html/index.html \ + doc/html/pcre2-config.html \ + doc/html/pcre2.html \ + doc/html/pcre2_callout_enumerate.html \ + doc/html/pcre2_code_copy.html \ + doc/html/pcre2_code_copy_with_tables.html \ + doc/html/pcre2_code_free.html \ + doc/html/pcre2_compile.html \ + doc/html/pcre2_compile_context_copy.html \ + doc/html/pcre2_compile_context_create.html \ + doc/html/pcre2_compile_context_free.html \ + doc/html/pcre2_config.html \ + doc/html/pcre2_convert_context_copy.html \ + doc/html/pcre2_convert_context_create.html \ + doc/html/pcre2_convert_context_free.html \ + doc/html/pcre2_converted_pattern_free.html \ + doc/html/pcre2_dfa_match.html \ + doc/html/pcre2_general_context_copy.html \ + doc/html/pcre2_general_context_create.html \ + doc/html/pcre2_general_context_free.html \ + doc/html/pcre2_get_error_message.html \ + doc/html/pcre2_get_mark.html \ + doc/html/pcre2_get_match_data_heapframes_size.html \ + doc/html/pcre2_get_match_data_size.html \ + doc/html/pcre2_get_ovector_count.html \ + doc/html/pcre2_get_ovector_pointer.html \ + doc/html/pcre2_get_startchar.html \ + doc/html/pcre2_jit_compile.html \ + doc/html/pcre2_jit_free_unused_memory.html \ + doc/html/pcre2_jit_match.html \ + doc/html/pcre2_jit_stack_assign.html \ + doc/html/pcre2_jit_stack_create.html \ + doc/html/pcre2_jit_stack_free.html \ + doc/html/pcre2_maketables.html \ + doc/html/pcre2_maketables_free.html \ + doc/html/pcre2_match.html \ + doc/html/pcre2_match_context_copy.html \ + doc/html/pcre2_match_context_create.html \ + doc/html/pcre2_match_context_free.html \ + doc/html/pcre2_match_data_create.html \ + doc/html/pcre2_match_data_create_from_pattern.html \ + doc/html/pcre2_match_data_free.html \ + doc/html/pcre2_pattern_convert.html \ + doc/html/pcre2_pattern_info.html \ + doc/html/pcre2_serialize_decode.html \ + doc/html/pcre2_serialize_encode.html \ + doc/html/pcre2_serialize_free.html \ + doc/html/pcre2_serialize_get_number_of_codes.html \ + doc/html/pcre2_set_bsr.html \ + doc/html/pcre2_set_callout.html \ + doc/html/pcre2_set_character_tables.html \ + doc/html/pcre2_set_compile_extra_options.html \ + doc/html/pcre2_set_compile_recursion_guard.html \ + doc/html/pcre2_set_depth_limit.html \ + doc/html/pcre2_set_glob_escape.html \ + doc/html/pcre2_set_glob_separator.html \ + doc/html/pcre2_set_heap_limit.html \ + doc/html/pcre2_set_match_limit.html \ + doc/html/pcre2_set_max_pattern_compiled_length.html \ + doc/html/pcre2_set_max_pattern_length.html \ + doc/html/pcre2_set_max_varlookbehind.html \ + doc/html/pcre2_set_offset_limit.html \ + doc/html/pcre2_set_optimize.html \ + doc/html/pcre2_set_newline.html \ + doc/html/pcre2_set_parens_nest_limit.html \ + doc/html/pcre2_set_recursion_limit.html \ + doc/html/pcre2_set_recursion_memory_management.html \ + doc/html/pcre2_set_substitute_callout.html \ + doc/html/pcre2_set_substitute_case_callout.html \ + doc/html/pcre2_substitute.html \ + doc/html/pcre2_substring_copy_byname.html \ + doc/html/pcre2_substring_copy_bynumber.html \ + doc/html/pcre2_substring_free.html \ + doc/html/pcre2_substring_get_byname.html \ + doc/html/pcre2_substring_get_bynumber.html \ + doc/html/pcre2_substring_length_byname.html \ + doc/html/pcre2_substring_length_bynumber.html \ + doc/html/pcre2_substring_list_free.html \ + doc/html/pcre2_substring_list_get.html \ + doc/html/pcre2_substring_nametable_scan.html \ + doc/html/pcre2_substring_number_from_name.html \ + doc/html/pcre2api.html \ + doc/html/pcre2build.html \ + doc/html/pcre2callout.html \ + doc/html/pcre2compat.html \ + doc/html/pcre2convert.html \ + doc/html/pcre2demo.html \ + doc/html/pcre2grep.html \ + doc/html/pcre2jit.html \ + doc/html/pcre2limits.html \ + doc/html/pcre2matching.html \ + doc/html/pcre2partial.html \ + doc/html/pcre2pattern.html \ + doc/html/pcre2perform.html \ + doc/html/pcre2posix.html \ + doc/html/pcre2sample.html \ + doc/html/pcre2serialize.html \ + doc/html/pcre2syntax.html \ + doc/html/pcre2test.html \ + doc/html/pcre2unicode.html + +dist_man_MANS = \ + doc/pcre2-config.1 \ + doc/pcre2.3 \ + doc/pcre2_callout_enumerate.3 \ + doc/pcre2_code_copy.3 \ + doc/pcre2_code_copy_with_tables.3 \ + doc/pcre2_code_free.3 \ + doc/pcre2_compile.3 \ + doc/pcre2_compile_context_copy.3 \ + doc/pcre2_compile_context_create.3 \ + doc/pcre2_compile_context_free.3 \ + doc/pcre2_config.3 \ + doc/pcre2_convert_context_copy.3 \ + doc/pcre2_convert_context_create.3 \ + doc/pcre2_convert_context_free.3 \ + doc/pcre2_converted_pattern_free.3 \ + doc/pcre2_dfa_match.3 \ + doc/pcre2_general_context_copy.3 \ + doc/pcre2_general_context_create.3 \ + doc/pcre2_general_context_free.3 \ + doc/pcre2_get_error_message.3 \ + doc/pcre2_get_mark.3 \ + doc/pcre2_get_match_data_heapframes_size.3 \ + doc/pcre2_get_match_data_size.3 \ + doc/pcre2_get_ovector_count.3 \ + doc/pcre2_get_ovector_pointer.3 \ + doc/pcre2_get_startchar.3 \ + doc/pcre2_jit_compile.3 \ + doc/pcre2_jit_free_unused_memory.3 \ + doc/pcre2_jit_match.3 \ + doc/pcre2_jit_stack_assign.3 \ + doc/pcre2_jit_stack_create.3 \ + doc/pcre2_jit_stack_free.3 \ + doc/pcre2_maketables.3 \ + doc/pcre2_maketables_free.3 \ + doc/pcre2_match.3 \ + doc/pcre2_match_context_copy.3 \ + doc/pcre2_match_context_create.3 \ + doc/pcre2_match_context_free.3 \ + doc/pcre2_match_data_create.3 \ + doc/pcre2_match_data_create_from_pattern.3 \ + doc/pcre2_match_data_free.3 \ + doc/pcre2_pattern_convert.3 \ + doc/pcre2_pattern_info.3 \ + doc/pcre2_serialize_decode.3 \ + doc/pcre2_serialize_encode.3 \ + doc/pcre2_serialize_free.3 \ + doc/pcre2_serialize_get_number_of_codes.3 \ + doc/pcre2_set_bsr.3 \ + doc/pcre2_set_callout.3 \ + doc/pcre2_set_character_tables.3 \ + doc/pcre2_set_compile_extra_options.3 \ + doc/pcre2_set_compile_recursion_guard.3 \ + doc/pcre2_set_depth_limit.3 \ + doc/pcre2_set_glob_escape.3 \ + doc/pcre2_set_glob_separator.3 \ + doc/pcre2_set_heap_limit.3 \ + doc/pcre2_set_match_limit.3 \ + doc/pcre2_set_max_pattern_compiled_length.3 \ + doc/pcre2_set_max_pattern_length.3 \ + doc/pcre2_set_max_varlookbehind.3 \ + doc/pcre2_set_offset_limit.3 \ + doc/pcre2_set_optimize.3 \ + doc/pcre2_set_newline.3 \ + doc/pcre2_set_parens_nest_limit.3 \ + doc/pcre2_set_recursion_limit.3 \ + doc/pcre2_set_recursion_memory_management.3 \ + doc/pcre2_set_substitute_callout.3 \ + doc/pcre2_set_substitute_case_callout.3 \ + doc/pcre2_substitute.3 \ + doc/pcre2_substring_copy_byname.3 \ + doc/pcre2_substring_copy_bynumber.3 \ + doc/pcre2_substring_free.3 \ + doc/pcre2_substring_get_byname.3 \ + doc/pcre2_substring_get_bynumber.3 \ + doc/pcre2_substring_length_byname.3 \ + doc/pcre2_substring_length_bynumber.3 \ + doc/pcre2_substring_list_free.3 \ + doc/pcre2_substring_list_get.3 \ + doc/pcre2_substring_nametable_scan.3 \ + doc/pcre2_substring_number_from_name.3 \ + doc/pcre2api.3 \ + doc/pcre2build.3 \ + doc/pcre2callout.3 \ + doc/pcre2compat.3 \ + doc/pcre2convert.3 \ + doc/pcre2demo.3 \ + doc/pcre2grep.1 \ + doc/pcre2jit.3 \ + doc/pcre2limits.3 \ + doc/pcre2matching.3 \ + doc/pcre2partial.3 \ + doc/pcre2pattern.3 \ + doc/pcre2perform.3 \ + doc/pcre2posix.3 \ + doc/pcre2sample.3 \ + doc/pcre2serialize.3 \ + doc/pcre2syntax.3 \ + doc/pcre2test.1 \ + doc/pcre2unicode.3 + +# The Libtool libraries to install. We'll add to this later. + +lib_LTLIBRARIES = + +# Unit tests you want to run when people type 'make check'. +# TESTS is for binary unit tests, check_SCRIPTS for script-based tests + +TESTS = +check_SCRIPTS = +dist_noinst_SCRIPTS = + +# Some of the binaries we make are to be installed, and others are +# (non-user-visible) helper programs needed to build the libraries. + +bin_PROGRAMS = +noinst_PROGRAMS = + +# Additional files to delete on 'make clean', 'make distclean', +# and 'make maintainer-clean'. It turns out that the default is to delete only +# those binaries that *this* configuration has created. If the configuration +# has been changed, some binaries may not get automatically deleted. Therefore +# we list them here. + +CLEANFILES = \ + pcre2_dftables \ + pcre2_jit_test \ + pcre2fuzzcheck-8 \ + pcre2fuzzcheck-16 \ + pcre2fuzzcheck-32 \ + pcre2demo + +DISTCLEANFILES = src/config.h.in~ +MAINTAINERCLEANFILES = + +# Additional files to bundle with the distribution, over and above what +# the Autotools include by default. + +EXTRA_DIST = + +# These files contain additional m4 macros that are used by autoconf. + +EXTRA_DIST += \ + m4/ax_pthread.m4 m4/pcre2_visibility.m4 + +# These files contain maintenance information + +EXTRA_DIST += \ + NON-AUTOTOOLS-BUILD \ + HACKING + +# These are support files for building with Bazel or Zig + +EXTRA_DIST += \ + BUILD.bazel \ + MODULE.bazel \ + WORKSPACE.bazel \ + build.zig + +# These are support files for building under VMS + +EXTRA_DIST += \ + vms/configure.com \ + vms/openvms_readme.txt \ + vms/pcre2.h_patch \ + vms/stdint.h + +# These files are usable versions of pcre2.h and config.h that are distributed +# for the benefit of people who are building PCRE2 manually, without the +# Autotools support. + +EXTRA_DIST += \ + src/pcre2.h.generic \ + src/config.h.generic + +# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE +# version number. Therefore, we can create the generic version just by copying. + +src/pcre2.h.generic: src/pcre2.h.in configure.ac + rm -f $@ + cp -p src/pcre2.h $@ + +# It is more complicated for config.h.generic. We need the version that results +# from a default configuration so as to get all the default values for PCRE +# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by +# doing a configure in a temporary directory. However, some trickery is needed, +# because the source directory may already be configured. If you just try +# running configure in a new directory, it complains. For this reason, we move +# config.status out of the way while doing the default configuration. The +# resulting config.h is munged by perl to put #ifdefs round any #defines for +# macros with values, and to #undef all boolean macros such as HAVE_xxx and +# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. + +src/config.h.generic: configure.ac + rm -rf $@ _generic + mkdir _generic + cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside + cd _generic && $(abs_top_srcdir)/configure || : + cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs + test -f _generic/src/config.h + perl -n \ + -e 'BEGIN{$$blank=0;}' \ + -e 'if(/(.+?)\s*__attribute__ \(\(visibility/){print"$$1\n";$$blank=0;next;}' \ + -e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \ + -e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \ + -e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \ + -e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \ + _generic/src/config.h >$@ + rm -rf _generic + +MAINTAINERCLEANFILES += src/pcre2.h.generic src/config.h.generic + +# These are the header files we'll install. We do not distribute pcre2.h +# because it is generated from pcre2.h.in. + +nodist_include_HEADERS = src/pcre2.h +include_HEADERS = src/pcre2posix.h + +# This is the "config" script. + +bin_SCRIPTS = pcre2-config + +## --------------------------------------------------------------- +## The pcre2_dftables program is used to rebuild character tables before +## compiling PCRE2, if --enable-rebuild-chartables is specified. It is not an +## installed program. The default (when --enable-rebuild-chartables is not +## specified) is to copy a distributed set of tables that are defined for ASCII +## code. In this case, pcre2_dftables is not needed. + +if WITH_REBUILD_CHARTABLES +noinst_PROGRAMS += pcre2_dftables +pcre2_dftables_SOURCES = src/pcre2_dftables.c +src/pcre2_chartables.c: pcre2_dftables$(EXEEXT) + rm -f $@ + ./pcre2_dftables$(EXEEXT) $@ +else +src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist + rm -f $@ + $(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c +endif # WITH_REBUILD_CHARTABLES + +BUILT_SOURCES = src/pcre2_chartables.c +NODIST_SOURCES = src/pcre2_chartables.c + +## Define the list of common sources, then arrange to build whichever of the +## 8-, 16-, or 32-bit libraries are configured. + +COMMON_SOURCES = \ + src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c \ + src/pcre2_compile.c \ + src/pcre2_compile.h \ + src/pcre2_compile_class.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_convert.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_extuni.c \ + src/pcre2_find_bracket.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_script_run.c \ + src/pcre2_serialize.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substitute.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_util.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c + +# The pcre2_ucptables.c file is #included by pcre2_tables.c + +EXTRA_DIST += src/pcre2_ucptables.c + +if WITH_PCRE2_8 +lib_LTLIBRARIES += libpcre2-8.la +libpcre2_8_la_SOURCES = \ + $(COMMON_SOURCES) +nodist_libpcre2_8_la_SOURCES = \ + $(NODIST_SOURCES) +libpcre2_8_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=8 \ + $(VISIBILITY_CFLAGS) \ + $(CET_CFLAGS) \ + $(AM_CFLAGS) +libpcre2_8_la_LIBADD = +endif # WITH_PCRE2_8 + +if WITH_PCRE2_16 +lib_LTLIBRARIES += libpcre2-16.la +libpcre2_16_la_SOURCES = \ + $(COMMON_SOURCES) +nodist_libpcre2_16_la_SOURCES = \ + $(NODIST_SOURCES) +libpcre2_16_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=16 \ + $(VISIBILITY_CFLAGS) \ + $(CET_CFLAGS) \ + $(AM_CFLAGS) +libpcre2_16_la_LIBADD = +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +lib_LTLIBRARIES += libpcre2-32.la +libpcre2_32_la_SOURCES = \ + $(COMMON_SOURCES) +nodist_libpcre2_32_la_SOURCES = \ + $(NODIST_SOURCES) +libpcre2_32_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=32 \ + $(VISIBILITY_CFLAGS) \ + $(CET_CFLAGS) \ + $(AM_CFLAGS) +libpcre2_32_la_LIBADD = +endif # WITH_PCRE2_32 + +# The pcre2_chartables.c.dist file is the default version of +# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified. + +EXTRA_DIST += src/pcre2_chartables.c.dist +CLEANFILES += src/pcre2_chartables.c + +# The JIT compiler lives in a separate directory, but its files are #included +# when pcre2_jit_compile.c is processed, so they must be distributed. + +EXTRA_DIST += \ + deps/sljit/sljit_src/sljitConfig.h \ + deps/sljit/sljit_src/sljitConfigCPU.h \ + deps/sljit/sljit_src/sljitConfigInternal.h \ + deps/sljit/sljit_src/sljitLir.c \ + deps/sljit/sljit_src/sljitLir.h \ + deps/sljit/sljit_src/sljitNativeARM_32.c \ + deps/sljit/sljit_src/sljitNativeARM_64.c \ + deps/sljit/sljit_src/sljitNativeARM_T2_32.c \ + deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_32.c \ + deps/sljit/sljit_src/sljitNativeMIPS_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_common.c \ + deps/sljit/sljit_src/sljitNativePPC_32.c \ + deps/sljit/sljit_src/sljitNativePPC_64.c \ + deps/sljit/sljit_src/sljitNativePPC_common.c \ + deps/sljit/sljit_src/sljitNativeRISCV_32.c \ + deps/sljit/sljit_src/sljitNativeRISCV_64.c \ + deps/sljit/sljit_src/sljitNativeRISCV_common.c \ + deps/sljit/sljit_src/sljitNativeS390X.c \ + deps/sljit/sljit_src/sljitNativeX86_32.c \ + deps/sljit/sljit_src/sljitNativeX86_64.c \ + deps/sljit/sljit_src/sljitNativeX86_common.c \ + deps/sljit/sljit_src/sljitSerialize.c \ + deps/sljit/sljit_src/sljitUtils.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c + +# Some of the JIT sources are also in separate files that are #included. + +EXTRA_DIST += \ + src/pcre2_jit_match.c \ + src/pcre2_jit_misc.c + +if WITH_PCRE2_8 +libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS) +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS) +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS) +endif # WITH_PCRE2_32 + +if WITH_VALGRIND +if WITH_PCRE2_8 +libpcre2_8_la_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +libpcre2_16_la_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +libpcre2_32_la_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_PCRE2_32 +endif # WITH_VALGRIND + +if WITH_GCOV +if WITH_PCRE2_8 +libpcre2_8_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +libpcre2_16_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +libpcre2_32_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_PCRE2_32 +endif # WITH_GCOV + +## A version of the 8-bit library that has a POSIX API. + +if WITH_PCRE2_8 +lib_LTLIBRARIES += libpcre2-posix.la +libpcre2_posix_la_SOURCES = src/pcre2posix.c +libpcre2_posix_la_CFLAGS = \ + -DPCRE2_CODE_UNIT_WIDTH=8 @PCRE2POSIX_CFLAG@ \ + $(VISIBILITY_CFLAGS) $(AM_CFLAGS) +libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS) +libpcre2_posix_la_LIBADD = libpcre2-8.la +if WITH_GCOV +libpcre2_posix_la_CFLAGS += $(GCOV_CFLAGS) +endif # WITH_GCOV +endif # WITH_PCRE2_8 + +## Build pcre2grep and optional fuzzer stuff if the 8-bit library is enabled + +if WITH_PCRE2_8 +bin_PROGRAMS += pcre2grep +pcre2grep_SOURCES = src/pcre2grep.c +pcre2grep_CFLAGS = $(AM_CFLAGS) +pcre2grep_LDADD = $(LIBZ) $(LIBBZ2) +pcre2grep_LDADD += libpcre2-8.la +if WITH_GCOV +pcre2grep_CFLAGS += $(GCOV_CFLAGS) +pcre2grep_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_8 + +## If fuzzer support is enabled, build a non-distributed library containing the +## fuzzing function. Also build the standalone checking binary from the same +## source but using -DSTANDALONE. + +if WITH_FUZZ_SUPPORT +noinst_LIBRARIES = +if WITH_PCRE2_8 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport.a +_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +_libs_libpcre2_fuzzsupport_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-8 +pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_8_CFLAGS = -DSTANDALONE $(AM_CFLAGS) +pcre2fuzzcheck_8_LDADD = libpcre2-8.la +if WITH_GCOV +pcre2fuzzcheck_8_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_8_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_8 + +if WITH_PCRE2_16 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-16.a +_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +_libs_libpcre2_fuzzsupport_16_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-16 +pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_16_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +pcre2fuzzcheck_16_LDADD = libpcre2-16.la +if WITH_GCOV +pcre2fuzzcheck_16_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_16_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-32.a +_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +_libs_libpcre2_fuzzsupport_32_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-32 +pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_32_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +pcre2fuzzcheck_32_LDADD = libpcre2-32.la +if WITH_GCOV +pcre2fuzzcheck_32_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_32_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_32 + +endif # WITH_FUZZ_SUPPORT + +## -------- Testing ---------- + +## If the 8-bit library is enabled, build the POSIX wrapper test program and +## arrange for it to run. + +if WITH_PCRE2_8 +TESTS += pcre2posix_test +noinst_PROGRAMS += pcre2posix_test +pcre2posix_test_SOURCES = src/pcre2posix_test.c +pcre2posix_test_CFLAGS = $(AM_CFLAGS) @PCRE2POSIX_CFLAG@ +pcre2posix_test_LDADD = libpcre2-posix.la libpcre2-8.la +endif # WITH_PCRE2_8 + +## If JIT support is enabled, arrange for the JIT test program to run. + +if WITH_JIT +TESTS += pcre2_jit_test +noinst_PROGRAMS += pcre2_jit_test +pcre2_jit_test_SOURCES = src/pcre2_jit_test.c +pcre2_jit_test_CFLAGS = $(AM_CFLAGS) +pcre2_jit_test_LDADD = +if WITH_PCRE2_8 +pcre2_jit_test_LDADD += libpcre2-8.la +endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +pcre2_jit_test_LDADD += libpcre2-16.la +endif # WITH_PCRE2_16 +if WITH_PCRE2_32 +pcre2_jit_test_LDADD += libpcre2-32.la +endif # WITH_PCRE2_32 +if WITH_GCOV +pcre2_jit_test_CFLAGS += $(GCOV_CFLAGS) +pcre2_jit_test_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_JIT + +# Build the general pcre2test program. The file src/pcre2_printint.c is +# #included by pcre2test as many times as needed, at different code unit +# widths. + +bin_PROGRAMS += pcre2test +EXTRA_DIST += src/pcre2_printint.c +pcre2test_SOURCES = src/pcre2test.c +pcre2test_CFLAGS = $(AM_CFLAGS) +pcre2test_LDADD = $(LIBREADLINE) + +if WITH_PCRE2_8 +pcre2test_LDADD += libpcre2-8.la libpcre2-posix.la +endif # WITH_PCRE2_8 + +if WITH_PCRE2_16 +pcre2test_LDADD += libpcre2-16.la +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +pcre2test_LDADD += libpcre2-32.la +endif # WITH_PCRE2_32 + +if WITH_VALGRIND +pcre2test_CFLAGS += $(VALGRIND_CFLAGS) +endif # WITH_VALGRIND + +if WITH_GCOV +pcre2test_CFLAGS += $(GCOV_CFLAGS) +pcre2test_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV + +## The main library tests. Each test is a binary plus a script that runs that +## binary in various ways. We install these test binaries in case folks find it +## helpful. The two .bat files are for running the tests under Windows. + +TESTS += RunTest +EXTRA_DIST += RunTest.bat +dist_noinst_SCRIPTS += RunTest + +## When the 8-bit library is configured, pcre2grep will have been built. + +if WITH_PCRE2_8 +TESTS += RunGrepTest +EXTRA_DIST += RunGrepTest.bat +dist_noinst_SCRIPTS += RunGrepTest +endif # WITH_PCRE2_8 + +## Distribute all the test data files + +EXTRA_DIST += \ + testdata/grepbinary \ + testdata/grepfilelist \ + testdata/grepinput \ + testdata/grepinput3 \ + testdata/grepinput8 \ + testdata/grepinputBad8 \ + testdata/grepinputBad8_Trail \ + testdata/grepinputC.bz2 \ + testdata/grepinputC.gz \ + testdata/grepinputM \ + testdata/grepinputUN \ + testdata/grepinputv \ + testdata/grepinputx \ + testdata/greplist \ + testdata/grepnot.bz2 \ + testdata/grepoutput \ + testdata/grepoutput8 \ + testdata/grepoutputC \ + testdata/grepoutputCN \ + testdata/grepoutputCNU \ + testdata/grepoutputCU \ + testdata/grepoutputCbz2 \ + testdata/grepoutputCgz \ + testdata/grepoutputN \ + testdata/grepoutputUN \ + testdata/greppatN4 \ + testdata/testbtables \ + testdata/testinput1 \ + testdata/testinput2 \ + testdata/testinput3 \ + testdata/testinput4 \ + testdata/testinput5 \ + testdata/testinput6 \ + testdata/testinput7 \ + testdata/testinput8 \ + testdata/testinput9 \ + testdata/testinput10 \ + testdata/testinput11 \ + testdata/testinput12 \ + testdata/testinput13 \ + testdata/testinput14 \ + testdata/testinput15 \ + testdata/testinput16 \ + testdata/testinput17 \ + testdata/testinput18 \ + testdata/testinput19 \ + testdata/testinput20 \ + testdata/testinput21 \ + testdata/testinput22 \ + testdata/testinput23 \ + testdata/testinput24 \ + testdata/testinput25 \ + testdata/testinput26 \ + testdata/testinput27 \ + testdata/testinputEBC \ + testdata/testinputheap \ + testdata/testoutput1 \ + testdata/testoutput2 \ + testdata/testoutput3 \ + testdata/testoutput3A \ + testdata/testoutput3B \ + testdata/testoutput4 \ + testdata/testoutput5 \ + testdata/testoutput6 \ + testdata/testoutput7 \ + testdata/testoutput8-16-2 \ + testdata/testoutput8-16-3 \ + testdata/testoutput8-16-4 \ + testdata/testoutput8-32-2 \ + testdata/testoutput8-32-3 \ + testdata/testoutput8-32-4 \ + testdata/testoutput8-8-2 \ + testdata/testoutput8-8-3 \ + testdata/testoutput8-8-4 \ + testdata/testoutput9 \ + testdata/testoutput10 \ + testdata/testoutput11-16 \ + testdata/testoutput11-32 \ + testdata/testoutput12-16 \ + testdata/testoutput12-32 \ + testdata/testoutput13 \ + testdata/testoutput14-16 \ + testdata/testoutput14-32 \ + testdata/testoutput14-8 \ + testdata/testoutput15 \ + testdata/testoutput16 \ + testdata/testoutput17 \ + testdata/testoutput18 \ + testdata/testoutput19 \ + testdata/testoutput20 \ + testdata/testoutput21 \ + testdata/testoutput22-16 \ + testdata/testoutput22-32 \ + testdata/testoutput22-8 \ + testdata/testoutput23 \ + testdata/testoutput24 \ + testdata/testoutput25 \ + testdata/testoutput26 \ + testdata/testoutput27 \ + testdata/testoutputEBC \ + testdata/testoutputheap-16 \ + testdata/testoutputheap-32 \ + testdata/testoutputheap-8 \ + testdata/valgrind-jit.supp \ + testdata/wintestinput3 \ + testdata/wintestoutput3 \ + perltest.sh + +# RunTest and RunGrepTest should clean up after themselves, but just in case +# they don't, add their working files to CLEANFILES. + +CLEANFILES += \ + testSinput \ + test3input \ + test3output \ + test3outputA \ + test3outputB \ + testtry \ + teststdout \ + teststderr \ + teststderrgrep \ + testtemp1grep \ + testtemp2grep \ + testtrygrep \ + testNinputgrep + +## ------------ End of testing ------------- + + +# PCRE2 demonstration program. Not built automatically. The point is that the +# users should build it themselves. So just distribute the source. + +EXTRA_DIST += src/pcre2demo.c + + +# We have .pc files for pkg-config users. + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = + +if WITH_PCRE2_8 +pkgconfig_DATA += libpcre2-8.pc libpcre2-posix.pc +endif + +if WITH_PCRE2_16 +pkgconfig_DATA += libpcre2-16.pc +endif + +if WITH_PCRE2_32 +pkgconfig_DATA += libpcre2-32.pc +endif + + +# gcov/lcov code coverage reporting +# +# Coverage reporting targets: +# +# coverage: Create a coverage report from 'make check' +# coverage-baseline: Capture baseline coverage information +# coverage-reset: This zeros the coverage counters only +# coverage-report: This creates the coverage report only +# coverage-clean-report: This removes the generated coverage report +# without cleaning the coverage data itself +# coverage-clean-data: This removes the captured coverage data without +# removing the coverage files created at compile time (*.gcno) +# coverage-clean: This cleans all coverage data including the generated +# coverage report. + +if WITH_GCOV +COVERAGE_TEST_NAME = $(PACKAGE) +COVERAGE_NAME = $(PACKAGE)-$(VERSION) +COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info +COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage +COVERAGE_LCOV_EXTRA_FLAGS = +COVERAGE_GENHTML_EXTRA_FLAGS = + +coverage_quiet = $(coverage_quiet_$(V)) +coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY)) +coverage_quiet_0 = --quiet + +coverage-check: all + -$(MAKE) $(AM_MAKEFLAGS) -k check + +coverage-baseline: + $(LCOV) $(coverage_quiet) \ + --directory $(top_builddir) \ + --output-file "$(COVERAGE_OUTPUT_FILE)" \ + --capture \ + --initial + +coverage-report: + $(LCOV) $(coverage_quiet) \ + --directory $(top_builddir) \ + --capture \ + --output-file "$(COVERAGE_OUTPUT_FILE).tmp" \ + --test-name "$(COVERAGE_TEST_NAME)" \ + --no-checksum \ + --compat-libtool \ + $(COVERAGE_LCOV_EXTRA_FLAGS) + $(LCOV) $(coverage_quiet) \ + --directory $(top_builddir) \ + --output-file "$(COVERAGE_OUTPUT_FILE)" \ + --remove "$(COVERAGE_OUTPUT_FILE).tmp" \ + "/tmp/*" \ + "/usr/include/*" \ + "$(includedir)/*" + -@rm -f "$(COVERAGE_OUTPUT_FILE).tmp" + LANG=C $(GENHTML) $(coverage_quiet) \ + --prefix $(top_builddir) \ + --output-directory "$(COVERAGE_OUTPUT_DIR)" \ + --title "$(PACKAGE) $(VERSION) Code Coverage Report" \ + --show-details "$(COVERAGE_OUTPUT_FILE)" \ + --legend \ + $(COVERAGE_GENHTML_EXTRA_FLAGS) + @echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html" + +coverage-reset: + -$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir) + +coverage-clean-report: + -rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp" + -rm -rf "$(COVERAGE_OUTPUT_DIR)" + +coverage-clean-data: + -find $(top_builddir) -name "*.gcda" -delete + +coverage-clean: coverage-reset coverage-clean-report coverage-clean-data + -find $(top_builddir) -name "*.gcno" -delete + +coverage-distclean: coverage-clean + +coverage: coverage-reset coverage-baseline coverage-check coverage-report +clean-local: coverage-clean +distclean-local: coverage-distclean + +.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean + +# Without coverage support, still arrange for 'make distclean' to get rid of +# any coverage files that may have been left from a different configuration. + +else + +coverage: + @echo "Configuring with --enable-coverage is required to generate code coverage report." + +DISTCLEANFILES += src/*.gcda src/*.gcno + +distclean-local: + rm -rf $(PACKAGE)-$(VERSION)-coverage* + +endif # WITH_GCOV + +## CMake support + +EXTRA_DIST += \ + cmake/COPYING-CMAKE-SCRIPTS \ + cmake/FindEditline.cmake \ + cmake/FindReadline.cmake \ + cmake/pcre2-config-version.cmake.in \ + cmake/pcre2-config.cmake.in \ + CMakeLists.txt \ + config-cmake.h.in + +## end Makefile.am diff --git a/3rd/pcre2/Makefile.in b/3rd/pcre2/Makefile.in new file mode 100644 index 00000000..5deb080a --- /dev/null +++ b/3rd/pcre2/Makefile.in @@ -0,0 +1,4029 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + + + + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +TESTS = $(am__EXEEXT_6) $(am__EXEEXT_7) RunTest $(am__append_43) +bin_PROGRAMS = $(am__EXEEXT_1) pcre2test$(EXEEXT) +noinst_PROGRAMS = $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ + $(am__EXEEXT_5) $(am__EXEEXT_6) $(am__EXEEXT_7) +@WITH_REBUILD_CHARTABLES_TRUE@am__append_1 = pcre2_dftables +@WITH_PCRE2_8_TRUE@am__append_2 = libpcre2-8.la +@WITH_PCRE2_16_TRUE@am__append_3 = libpcre2-16.la +@WITH_PCRE2_32_TRUE@am__append_4 = libpcre2-32.la +@WITH_PCRE2_8_TRUE@@WITH_VALGRIND_TRUE@am__append_5 = $(VALGRIND_CFLAGS) +@WITH_PCRE2_16_TRUE@@WITH_VALGRIND_TRUE@am__append_6 = $(VALGRIND_CFLAGS) +@WITH_PCRE2_32_TRUE@@WITH_VALGRIND_TRUE@am__append_7 = $(VALGRIND_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_8 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__append_9 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__append_10 = $(GCOV_CFLAGS) +@WITH_PCRE2_8_TRUE@am__append_11 = libpcre2-posix.la +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_12 = $(GCOV_CFLAGS) +@WITH_PCRE2_8_TRUE@am__append_13 = pcre2grep +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_14 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_15 = $(GCOV_LIBS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__append_16 = .libs/libpcre2-fuzzsupport.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__append_17 = pcre2fuzzcheck-8 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_18 = $(GCOV_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__append_19 = $(GCOV_LIBS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__append_20 = .libs/libpcre2-fuzzsupport-16.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__append_21 = pcre2fuzzcheck-16 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__append_22 = $(GCOV_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__append_23 = $(GCOV_LIBS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__append_24 = .libs/libpcre2-fuzzsupport-32.a +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__append_25 = pcre2fuzzcheck-32 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__append_26 = $(GCOV_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__append_27 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_28 = pcre2posix_test +@WITH_PCRE2_8_TRUE@am__append_29 = pcre2posix_test +@WITH_JIT_TRUE@am__append_30 = pcre2_jit_test +@WITH_JIT_TRUE@am__append_31 = pcre2_jit_test +@WITH_JIT_TRUE@@WITH_PCRE2_8_TRUE@am__append_32 = libpcre2-8.la +@WITH_JIT_TRUE@@WITH_PCRE2_16_TRUE@am__append_33 = libpcre2-16.la +@WITH_JIT_TRUE@@WITH_PCRE2_32_TRUE@am__append_34 = libpcre2-32.la +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_35 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__append_36 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_37 = libpcre2-8.la libpcre2-posix.la +@WITH_PCRE2_16_TRUE@am__append_38 = libpcre2-16.la +@WITH_PCRE2_32_TRUE@am__append_39 = libpcre2-32.la +@WITH_VALGRIND_TRUE@am__append_40 = $(VALGRIND_CFLAGS) +@WITH_GCOV_TRUE@am__append_41 = $(GCOV_CFLAGS) +@WITH_GCOV_TRUE@am__append_42 = $(GCOV_LIBS) +@WITH_PCRE2_8_TRUE@am__append_43 = RunGrepTest +@WITH_PCRE2_8_TRUE@am__append_44 = RunGrepTest.bat +@WITH_PCRE2_8_TRUE@am__append_45 = RunGrepTest +@WITH_PCRE2_8_TRUE@am__append_46 = libpcre2-8.pc libpcre2-posix.pc +@WITH_PCRE2_16_TRUE@am__append_47 = libpcre2-16.pc +@WITH_PCRE2_32_TRUE@am__append_48 = libpcre2-32.pc +@WITH_GCOV_FALSE@am__append_49 = src/*.gcda src/*.gcno +subdir = . +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/pcre2_visibility.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ + $(am__configure_deps) $(am__dist_noinst_SCRIPTS_DIST) \ + $(dist_doc_DATA) $(dist_html_DATA) $(include_HEADERS) \ + $(am__DIST_COMMON) +am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/config.h +CONFIG_CLEAN_FILES = libpcre2-8.pc libpcre2-16.pc libpcre2-32.pc \ + libpcre2-posix.pc pcre2-config src/pcre2.h +CONFIG_CLEAN_VPATH_FILES = +@WITH_PCRE2_8_TRUE@am__EXEEXT_1 = pcre2grep$(EXEEXT) +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \ + "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" \ + "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" \ + "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" \ + "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" +@WITH_REBUILD_CHARTABLES_TRUE@am__EXEEXT_2 = pcre2_dftables$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__EXEEXT_3 = pcre2fuzzcheck-8$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__EXEEXT_4 = pcre2fuzzcheck-16$(EXEEXT) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__EXEEXT_5 = pcre2fuzzcheck-32$(EXEEXT) +@WITH_PCRE2_8_TRUE@am__EXEEXT_6 = pcre2posix_test$(EXEEXT) +@WITH_JIT_TRUE@am__EXEEXT_7 = pcre2_jit_test$(EXEEXT) +PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) +LIBRARIES = $(noinst_LIBRARIES) +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +LTLIBRARIES = $(lib_LTLIBRARIES) +ARFLAGS = cru +AM_V_AR = $(am__v_AR_@AM_V@) +am__v_AR_ = $(am__v_AR_@AM_DEFAULT_V@) +am__v_AR_0 = @echo " AR " $@; +am__v_AR_1 = +_libs_libpcre2_fuzzsupport_16_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_16_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_16_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +am__dirstamp = $(am__leading_dot)dirstamp +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am__libs_libpcre2_fuzzsupport_16_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_16_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_16_a_OBJECTS) +_libs_libpcre2_fuzzsupport_32_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_32_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_32_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am__libs_libpcre2_fuzzsupport_32_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_32_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_32_a_OBJECTS) +_libs_libpcre2_fuzzsupport_a_AR = $(AR) $(ARFLAGS) +_libs_libpcre2_fuzzsupport_a_DEPENDENCIES = +am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST = \ + src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am__libs_libpcre2_fuzzsupport_a_OBJECTS = src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT) +_libs_libpcre2_fuzzsupport_a_OBJECTS = \ + $(am__libs_libpcre2_fuzzsupport_a_OBJECTS) +libpcre2_16_la_DEPENDENCIES = +am__libpcre2_16_la_SOURCES_DIST = src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ + src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ + src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ + src/pcre2_internal.h src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c +am__objects_1 = src/libpcre2_16_la-pcre2_auto_possess.lo \ + src/libpcre2_16_la-pcre2_chkdint.lo \ + src/libpcre2_16_la-pcre2_compile.lo \ + src/libpcre2_16_la-pcre2_compile_class.lo \ + src/libpcre2_16_la-pcre2_config.lo \ + src/libpcre2_16_la-pcre2_context.lo \ + src/libpcre2_16_la-pcre2_convert.lo \ + src/libpcre2_16_la-pcre2_dfa_match.lo \ + src/libpcre2_16_la-pcre2_error.lo \ + src/libpcre2_16_la-pcre2_extuni.lo \ + src/libpcre2_16_la-pcre2_find_bracket.lo \ + src/libpcre2_16_la-pcre2_jit_compile.lo \ + src/libpcre2_16_la-pcre2_maketables.lo \ + src/libpcre2_16_la-pcre2_match.lo \ + src/libpcre2_16_la-pcre2_match_data.lo \ + src/libpcre2_16_la-pcre2_newline.lo \ + src/libpcre2_16_la-pcre2_ord2utf.lo \ + src/libpcre2_16_la-pcre2_pattern_info.lo \ + src/libpcre2_16_la-pcre2_script_run.lo \ + src/libpcre2_16_la-pcre2_serialize.lo \ + src/libpcre2_16_la-pcre2_string_utils.lo \ + src/libpcre2_16_la-pcre2_study.lo \ + src/libpcre2_16_la-pcre2_substitute.lo \ + src/libpcre2_16_la-pcre2_substring.lo \ + src/libpcre2_16_la-pcre2_tables.lo \ + src/libpcre2_16_la-pcre2_ucd.lo \ + src/libpcre2_16_la-pcre2_valid_utf.lo \ + src/libpcre2_16_la-pcre2_xclass.lo +@WITH_PCRE2_16_TRUE@am_libpcre2_16_la_OBJECTS = $(am__objects_1) +am__objects_2 = src/libpcre2_16_la-pcre2_chartables.lo +@WITH_PCRE2_16_TRUE@nodist_libpcre2_16_la_OBJECTS = $(am__objects_2) +libpcre2_16_la_OBJECTS = $(am_libpcre2_16_la_OBJECTS) \ + $(nodist_libpcre2_16_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libpcre2_16_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpcre2_16_la_CFLAGS) $(CFLAGS) $(libpcre2_16_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@WITH_PCRE2_16_TRUE@am_libpcre2_16_la_rpath = -rpath $(libdir) +libpcre2_32_la_DEPENDENCIES = +am__libpcre2_32_la_SOURCES_DIST = src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ + src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ + src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ + src/pcre2_internal.h src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c +am__objects_3 = src/libpcre2_32_la-pcre2_auto_possess.lo \ + src/libpcre2_32_la-pcre2_chkdint.lo \ + src/libpcre2_32_la-pcre2_compile.lo \ + src/libpcre2_32_la-pcre2_compile_class.lo \ + src/libpcre2_32_la-pcre2_config.lo \ + src/libpcre2_32_la-pcre2_context.lo \ + src/libpcre2_32_la-pcre2_convert.lo \ + src/libpcre2_32_la-pcre2_dfa_match.lo \ + src/libpcre2_32_la-pcre2_error.lo \ + src/libpcre2_32_la-pcre2_extuni.lo \ + src/libpcre2_32_la-pcre2_find_bracket.lo \ + src/libpcre2_32_la-pcre2_jit_compile.lo \ + src/libpcre2_32_la-pcre2_maketables.lo \ + src/libpcre2_32_la-pcre2_match.lo \ + src/libpcre2_32_la-pcre2_match_data.lo \ + src/libpcre2_32_la-pcre2_newline.lo \ + src/libpcre2_32_la-pcre2_ord2utf.lo \ + src/libpcre2_32_la-pcre2_pattern_info.lo \ + src/libpcre2_32_la-pcre2_script_run.lo \ + src/libpcre2_32_la-pcre2_serialize.lo \ + src/libpcre2_32_la-pcre2_string_utils.lo \ + src/libpcre2_32_la-pcre2_study.lo \ + src/libpcre2_32_la-pcre2_substitute.lo \ + src/libpcre2_32_la-pcre2_substring.lo \ + src/libpcre2_32_la-pcre2_tables.lo \ + src/libpcre2_32_la-pcre2_ucd.lo \ + src/libpcre2_32_la-pcre2_valid_utf.lo \ + src/libpcre2_32_la-pcre2_xclass.lo +@WITH_PCRE2_32_TRUE@am_libpcre2_32_la_OBJECTS = $(am__objects_3) +am__objects_4 = src/libpcre2_32_la-pcre2_chartables.lo +@WITH_PCRE2_32_TRUE@nodist_libpcre2_32_la_OBJECTS = $(am__objects_4) +libpcre2_32_la_OBJECTS = $(am_libpcre2_32_la_OBJECTS) \ + $(nodist_libpcre2_32_la_OBJECTS) +libpcre2_32_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpcre2_32_la_CFLAGS) $(CFLAGS) $(libpcre2_32_la_LDFLAGS) \ + $(LDFLAGS) -o $@ +@WITH_PCRE2_32_TRUE@am_libpcre2_32_la_rpath = -rpath $(libdir) +libpcre2_8_la_DEPENDENCIES = +am__libpcre2_8_la_SOURCES_DIST = src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c src/pcre2_compile.c src/pcre2_compile.h \ + src/pcre2_compile_class.c src/pcre2_config.c \ + src/pcre2_context.c src/pcre2_convert.c src/pcre2_dfa_match.c \ + src/pcre2_error.c src/pcre2_extuni.c src/pcre2_find_bracket.c \ + src/pcre2_internal.h src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c src/pcre2_match.c \ + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c src/pcre2_script_run.c \ + src/pcre2_serialize.c src/pcre2_string_utils.c \ + src/pcre2_study.c src/pcre2_substitute.c src/pcre2_substring.c \ + src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_ucp.h \ + src/pcre2_util.h src/pcre2_valid_utf.c src/pcre2_xclass.c +am__objects_5 = src/libpcre2_8_la-pcre2_auto_possess.lo \ + src/libpcre2_8_la-pcre2_chkdint.lo \ + src/libpcre2_8_la-pcre2_compile.lo \ + src/libpcre2_8_la-pcre2_compile_class.lo \ + src/libpcre2_8_la-pcre2_config.lo \ + src/libpcre2_8_la-pcre2_context.lo \ + src/libpcre2_8_la-pcre2_convert.lo \ + src/libpcre2_8_la-pcre2_dfa_match.lo \ + src/libpcre2_8_la-pcre2_error.lo \ + src/libpcre2_8_la-pcre2_extuni.lo \ + src/libpcre2_8_la-pcre2_find_bracket.lo \ + src/libpcre2_8_la-pcre2_jit_compile.lo \ + src/libpcre2_8_la-pcre2_maketables.lo \ + src/libpcre2_8_la-pcre2_match.lo \ + src/libpcre2_8_la-pcre2_match_data.lo \ + src/libpcre2_8_la-pcre2_newline.lo \ + src/libpcre2_8_la-pcre2_ord2utf.lo \ + src/libpcre2_8_la-pcre2_pattern_info.lo \ + src/libpcre2_8_la-pcre2_script_run.lo \ + src/libpcre2_8_la-pcre2_serialize.lo \ + src/libpcre2_8_la-pcre2_string_utils.lo \ + src/libpcre2_8_la-pcre2_study.lo \ + src/libpcre2_8_la-pcre2_substitute.lo \ + src/libpcre2_8_la-pcre2_substring.lo \ + src/libpcre2_8_la-pcre2_tables.lo \ + src/libpcre2_8_la-pcre2_ucd.lo \ + src/libpcre2_8_la-pcre2_valid_utf.lo \ + src/libpcre2_8_la-pcre2_xclass.lo +@WITH_PCRE2_8_TRUE@am_libpcre2_8_la_OBJECTS = $(am__objects_5) +am__objects_6 = src/libpcre2_8_la-pcre2_chartables.lo +@WITH_PCRE2_8_TRUE@nodist_libpcre2_8_la_OBJECTS = $(am__objects_6) +libpcre2_8_la_OBJECTS = $(am_libpcre2_8_la_OBJECTS) \ + $(nodist_libpcre2_8_la_OBJECTS) +libpcre2_8_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libpcre2_8_la_CFLAGS) \ + $(CFLAGS) $(libpcre2_8_la_LDFLAGS) $(LDFLAGS) -o $@ +@WITH_PCRE2_8_TRUE@am_libpcre2_8_la_rpath = -rpath $(libdir) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_DEPENDENCIES = libpcre2-8.la +am__libpcre2_posix_la_SOURCES_DIST = src/pcre2posix.c +@WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_OBJECTS = \ +@WITH_PCRE2_8_TRUE@ src/libpcre2_posix_la-pcre2posix.lo +libpcre2_posix_la_OBJECTS = $(am_libpcre2_posix_la_OBJECTS) +libpcre2_posix_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(libpcre2_posix_la_CFLAGS) $(CFLAGS) \ + $(libpcre2_posix_la_LDFLAGS) $(LDFLAGS) -o $@ +@WITH_PCRE2_8_TRUE@am_libpcre2_posix_la_rpath = -rpath $(libdir) +am__pcre2_dftables_SOURCES_DIST = src/pcre2_dftables.c +@WITH_REBUILD_CHARTABLES_TRUE@am_pcre2_dftables_OBJECTS = \ +@WITH_REBUILD_CHARTABLES_TRUE@ src/pcre2_dftables.$(OBJEXT) +pcre2_dftables_OBJECTS = $(am_pcre2_dftables_OBJECTS) +pcre2_dftables_LDADD = $(LDADD) +am__pcre2_jit_test_SOURCES_DIST = src/pcre2_jit_test.c +@WITH_JIT_TRUE@am_pcre2_jit_test_OBJECTS = \ +@WITH_JIT_TRUE@ src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT) +pcre2_jit_test_OBJECTS = $(am_pcre2_jit_test_OBJECTS) +am__DEPENDENCIES_1 = +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@am__DEPENDENCIES_2 = \ +@WITH_GCOV_TRUE@@WITH_JIT_TRUE@ $(am__DEPENDENCIES_1) +@WITH_JIT_TRUE@pcre2_jit_test_DEPENDENCIES = $(am__append_32) \ +@WITH_JIT_TRUE@ $(am__append_33) $(am__append_34) \ +@WITH_JIT_TRUE@ $(am__DEPENDENCIES_2) +pcre2_jit_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2_jit_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o \ + $@ +am__pcre2fuzzcheck_16_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@am_pcre2fuzzcheck_16_OBJECTS = src/pcre2fuzzcheck_16-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_16_OBJECTS = $(am_pcre2fuzzcheck_16_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_16_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ libpcre2-16.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(am__DEPENDENCIES_3) +pcre2fuzzcheck_16_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__pcre2fuzzcheck_32_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@am_pcre2fuzzcheck_32_OBJECTS = src/pcre2fuzzcheck_32-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_32_OBJECTS = $(am_pcre2fuzzcheck_32_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_32_TRUE@am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ libpcre2-32.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(am__DEPENDENCIES_4) +pcre2fuzzcheck_32_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__pcre2fuzzcheck_8_SOURCES_DIST = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@am_pcre2fuzzcheck_8_OBJECTS = src/pcre2fuzzcheck_8-pcre2_fuzzsupport.$(OBJEXT) +pcre2fuzzcheck_8_OBJECTS = $(am_pcre2fuzzcheck_8_OBJECTS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_5 = $(am__DEPENDENCIES_1) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_DEPENDENCIES = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_5) +pcre2fuzzcheck_8_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am__pcre2grep_SOURCES_DIST = src/pcre2grep.c +@WITH_PCRE2_8_TRUE@am_pcre2grep_OBJECTS = \ +@WITH_PCRE2_8_TRUE@ src/pcre2grep-pcre2grep.$(OBJEXT) +pcre2grep_OBJECTS = $(am_pcre2grep_OBJECTS) +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@am__DEPENDENCIES_6 = \ +@WITH_GCOV_TRUE@@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_1) +@WITH_PCRE2_8_TRUE@pcre2grep_DEPENDENCIES = $(am__DEPENDENCIES_1) \ +@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_1) libpcre2-8.la \ +@WITH_PCRE2_8_TRUE@ $(am__DEPENDENCIES_6) +pcre2grep_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2grep_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__pcre2posix_test_SOURCES_DIST = src/pcre2posix_test.c +@WITH_PCRE2_8_TRUE@am_pcre2posix_test_OBJECTS = src/pcre2posix_test-pcre2posix_test.$(OBJEXT) +pcre2posix_test_OBJECTS = $(am_pcre2posix_test_OBJECTS) +@WITH_PCRE2_8_TRUE@pcre2posix_test_DEPENDENCIES = libpcre2-posix.la \ +@WITH_PCRE2_8_TRUE@ libpcre2-8.la +pcre2posix_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(pcre2posix_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ + -o $@ +am_pcre2test_OBJECTS = src/pcre2test-pcre2test.$(OBJEXT) +pcre2test_OBJECTS = $(am_pcre2test_OBJECTS) +@WITH_GCOV_TRUE@am__DEPENDENCIES_7 = $(am__DEPENDENCIES_1) +pcre2test_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__append_37) \ + $(am__append_38) $(am__append_39) $(am__DEPENDENCIES_7) +pcre2test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(pcre2test_CFLAGS) \ + $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__dist_noinst_SCRIPTS_DIST = RunTest RunGrepTest +SCRIPTS = $(bin_SCRIPTS) $(dist_noinst_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo \ + src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo \ + src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo \ + src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo \ + src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo \ + src/$(DEPDIR)/pcre2_dftables.Po \ + src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po \ + src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po \ + src/$(DEPDIR)/pcre2grep-pcre2grep.Po \ + src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po \ + src/$(DEPDIR)/pcre2test-pcre2test.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(_libs_libpcre2_fuzzsupport_16_a_SOURCES) \ + $(_libs_libpcre2_fuzzsupport_32_a_SOURCES) \ + $(_libs_libpcre2_fuzzsupport_a_SOURCES) \ + $(libpcre2_16_la_SOURCES) $(nodist_libpcre2_16_la_SOURCES) \ + $(libpcre2_32_la_SOURCES) $(nodist_libpcre2_32_la_SOURCES) \ + $(libpcre2_8_la_SOURCES) $(nodist_libpcre2_8_la_SOURCES) \ + $(libpcre2_posix_la_SOURCES) $(pcre2_dftables_SOURCES) \ + $(pcre2_jit_test_SOURCES) $(pcre2fuzzcheck_16_SOURCES) \ + $(pcre2fuzzcheck_32_SOURCES) $(pcre2fuzzcheck_8_SOURCES) \ + $(pcre2grep_SOURCES) $(pcre2posix_test_SOURCES) \ + $(pcre2test_SOURCES) +DIST_SOURCES = $(am___libs_libpcre2_fuzzsupport_16_a_SOURCES_DIST) \ + $(am___libs_libpcre2_fuzzsupport_32_a_SOURCES_DIST) \ + $(am___libs_libpcre2_fuzzsupport_a_SOURCES_DIST) \ + $(am__libpcre2_16_la_SOURCES_DIST) \ + $(am__libpcre2_32_la_SOURCES_DIST) \ + $(am__libpcre2_8_la_SOURCES_DIST) \ + $(am__libpcre2_posix_la_SOURCES_DIST) \ + $(am__pcre2_dftables_SOURCES_DIST) \ + $(am__pcre2_jit_test_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_16_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_32_SOURCES_DIST) \ + $(am__pcre2fuzzcheck_8_SOURCES_DIST) \ + $(am__pcre2grep_SOURCES_DIST) \ + $(am__pcre2posix_test_SOURCES_DIST) $(pcre2test_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +man1dir = $(mandir)/man1 +man3dir = $(mandir)/man3 +NROFF = nroff +MANS = $(dist_man_MANS) +DATA = $(dist_doc_DATA) $(dist_html_DATA) $(pkgconfig_DATA) +HEADERS = $(include_HEADERS) $(nodist_include_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +AM_RECURSIVE_TARGETS = cscope check recheck +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red=''; \ + grn=''; \ + lgn=''; \ + blu=''; \ + mgn=''; \ + brg=''; \ + std=''; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(dist_man_MANS) $(srcdir)/Makefile.in \ + $(srcdir)/libpcre2-16.pc.in $(srcdir)/libpcre2-32.pc.in \ + $(srcdir)/libpcre2-8.pc.in $(srcdir)/libpcre2-posix.pc.in \ + $(srcdir)/pcre2-config.in $(top_srcdir)/src/config.h.in \ + $(top_srcdir)/src/pcre2.h.in AUTHORS.md COPYING ChangeLog \ + INSTALL NEWS README ar-lib compile config.guess config.sub \ + depcomp install-sh ltmain.sh missing test-driver +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +distdir = $(PACKAGE)-$(VERSION) +top_distdir = $(distdir) +am__remove_distdir = \ + if test -d "$(distdir)"; then \ + find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ + && rm -rf "$(distdir)" \ + || { sleep 5 && rm -rf "$(distdir)"; }; \ + else :; fi +am__post_remove_distdir = $(am__remove_distdir) +DIST_ARCHIVES = $(distdir).tar.gz $(distdir).tar.bz2 $(distdir).zip +GZIP_ENV = --best +DIST_TARGETS = dist-bzip2 dist-gzip dist-zip +# Exists only to be overridden by the user if desired. +AM_DISTCHECK_DVI_TARGET = dvi +distuninstallcheck_listfiles = find . -type f -print +am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ + | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' +distcleancheck_listfiles = find . -type f -print +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CET_CFLAGS = @CET_CFLAGS@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +EXTRA_LIBPCRE2_16_LDFLAGS = @EXTRA_LIBPCRE2_16_LDFLAGS@ +EXTRA_LIBPCRE2_32_LDFLAGS = @EXTRA_LIBPCRE2_32_LDFLAGS@ +EXTRA_LIBPCRE2_8_LDFLAGS = @EXTRA_LIBPCRE2_8_LDFLAGS@ +EXTRA_LIBPCRE2_POSIX_LDFLAGS = @EXTRA_LIBPCRE2_POSIX_LDFLAGS@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GCOV_CFLAGS = @GCOV_CFLAGS@ +GCOV_CXXFLAGS = @GCOV_CXXFLAGS@ +GCOV_LIBS = @GCOV_LIBS@ +GENHTML = @GENHTML@ +GREP = @GREP@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LCOV = @LCOV@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBBZ2 = @LIBBZ2@ +LIBOBJS = @LIBOBJS@ +LIBREADLINE = @LIBREADLINE@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIBZ = @LIBZ@ +LIB_POSTFIX = @LIB_POSTFIX@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PCRE2POSIX_CFLAG = @PCRE2POSIX_CFLAG@ +PCRE2_DATE = @PCRE2_DATE@ +PCRE2_MAJOR = @PCRE2_MAJOR@ +PCRE2_MINOR = @PCRE2_MINOR@ +PCRE2_PRERELEASE = @PCRE2_PRERELEASE@ +PCRE2_STATIC_CFLAG = @PCRE2_STATIC_CFLAG@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SHTOOL = @SHTOOL@ +STRIP = @STRIP@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ +VALGRIND_LIBS = @VALGRIND_LIBS@ +VERSION = @VERSION@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +enable_pcre2_16 = @enable_pcre2_16@ +enable_pcre2_32 = @enable_pcre2_32@ +enable_pcre2_8 = @enable_pcre2_8@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = subdir-objects +ACLOCAL_AMFLAGS = -I m4 +AM_CPPFLAGS = "-I$(srcdir)/src" +dist_doc_DATA = \ + AUTHORS.md \ + COPYING \ + ChangeLog \ + LICENCE.md \ + NEWS \ + README \ + SECURITY.md \ + doc/pcre2.txt \ + doc/pcre2-config.txt \ + doc/pcre2grep.txt \ + doc/pcre2test.txt + +dist_html_DATA = \ + doc/html/NON-AUTOTOOLS-BUILD.txt \ + doc/html/README.txt \ + doc/html/index.html \ + doc/html/pcre2-config.html \ + doc/html/pcre2.html \ + doc/html/pcre2_callout_enumerate.html \ + doc/html/pcre2_code_copy.html \ + doc/html/pcre2_code_copy_with_tables.html \ + doc/html/pcre2_code_free.html \ + doc/html/pcre2_compile.html \ + doc/html/pcre2_compile_context_copy.html \ + doc/html/pcre2_compile_context_create.html \ + doc/html/pcre2_compile_context_free.html \ + doc/html/pcre2_config.html \ + doc/html/pcre2_convert_context_copy.html \ + doc/html/pcre2_convert_context_create.html \ + doc/html/pcre2_convert_context_free.html \ + doc/html/pcre2_converted_pattern_free.html \ + doc/html/pcre2_dfa_match.html \ + doc/html/pcre2_general_context_copy.html \ + doc/html/pcre2_general_context_create.html \ + doc/html/pcre2_general_context_free.html \ + doc/html/pcre2_get_error_message.html \ + doc/html/pcre2_get_mark.html \ + doc/html/pcre2_get_match_data_heapframes_size.html \ + doc/html/pcre2_get_match_data_size.html \ + doc/html/pcre2_get_ovector_count.html \ + doc/html/pcre2_get_ovector_pointer.html \ + doc/html/pcre2_get_startchar.html \ + doc/html/pcre2_jit_compile.html \ + doc/html/pcre2_jit_free_unused_memory.html \ + doc/html/pcre2_jit_match.html \ + doc/html/pcre2_jit_stack_assign.html \ + doc/html/pcre2_jit_stack_create.html \ + doc/html/pcre2_jit_stack_free.html \ + doc/html/pcre2_maketables.html \ + doc/html/pcre2_maketables_free.html \ + doc/html/pcre2_match.html \ + doc/html/pcre2_match_context_copy.html \ + doc/html/pcre2_match_context_create.html \ + doc/html/pcre2_match_context_free.html \ + doc/html/pcre2_match_data_create.html \ + doc/html/pcre2_match_data_create_from_pattern.html \ + doc/html/pcre2_match_data_free.html \ + doc/html/pcre2_pattern_convert.html \ + doc/html/pcre2_pattern_info.html \ + doc/html/pcre2_serialize_decode.html \ + doc/html/pcre2_serialize_encode.html \ + doc/html/pcre2_serialize_free.html \ + doc/html/pcre2_serialize_get_number_of_codes.html \ + doc/html/pcre2_set_bsr.html \ + doc/html/pcre2_set_callout.html \ + doc/html/pcre2_set_character_tables.html \ + doc/html/pcre2_set_compile_extra_options.html \ + doc/html/pcre2_set_compile_recursion_guard.html \ + doc/html/pcre2_set_depth_limit.html \ + doc/html/pcre2_set_glob_escape.html \ + doc/html/pcre2_set_glob_separator.html \ + doc/html/pcre2_set_heap_limit.html \ + doc/html/pcre2_set_match_limit.html \ + doc/html/pcre2_set_max_pattern_compiled_length.html \ + doc/html/pcre2_set_max_pattern_length.html \ + doc/html/pcre2_set_max_varlookbehind.html \ + doc/html/pcre2_set_offset_limit.html \ + doc/html/pcre2_set_optimize.html \ + doc/html/pcre2_set_newline.html \ + doc/html/pcre2_set_parens_nest_limit.html \ + doc/html/pcre2_set_recursion_limit.html \ + doc/html/pcre2_set_recursion_memory_management.html \ + doc/html/pcre2_set_substitute_callout.html \ + doc/html/pcre2_set_substitute_case_callout.html \ + doc/html/pcre2_substitute.html \ + doc/html/pcre2_substring_copy_byname.html \ + doc/html/pcre2_substring_copy_bynumber.html \ + doc/html/pcre2_substring_free.html \ + doc/html/pcre2_substring_get_byname.html \ + doc/html/pcre2_substring_get_bynumber.html \ + doc/html/pcre2_substring_length_byname.html \ + doc/html/pcre2_substring_length_bynumber.html \ + doc/html/pcre2_substring_list_free.html \ + doc/html/pcre2_substring_list_get.html \ + doc/html/pcre2_substring_nametable_scan.html \ + doc/html/pcre2_substring_number_from_name.html \ + doc/html/pcre2api.html \ + doc/html/pcre2build.html \ + doc/html/pcre2callout.html \ + doc/html/pcre2compat.html \ + doc/html/pcre2convert.html \ + doc/html/pcre2demo.html \ + doc/html/pcre2grep.html \ + doc/html/pcre2jit.html \ + doc/html/pcre2limits.html \ + doc/html/pcre2matching.html \ + doc/html/pcre2partial.html \ + doc/html/pcre2pattern.html \ + doc/html/pcre2perform.html \ + doc/html/pcre2posix.html \ + doc/html/pcre2sample.html \ + doc/html/pcre2serialize.html \ + doc/html/pcre2syntax.html \ + doc/html/pcre2test.html \ + doc/html/pcre2unicode.html + +dist_man_MANS = \ + doc/pcre2-config.1 \ + doc/pcre2.3 \ + doc/pcre2_callout_enumerate.3 \ + doc/pcre2_code_copy.3 \ + doc/pcre2_code_copy_with_tables.3 \ + doc/pcre2_code_free.3 \ + doc/pcre2_compile.3 \ + doc/pcre2_compile_context_copy.3 \ + doc/pcre2_compile_context_create.3 \ + doc/pcre2_compile_context_free.3 \ + doc/pcre2_config.3 \ + doc/pcre2_convert_context_copy.3 \ + doc/pcre2_convert_context_create.3 \ + doc/pcre2_convert_context_free.3 \ + doc/pcre2_converted_pattern_free.3 \ + doc/pcre2_dfa_match.3 \ + doc/pcre2_general_context_copy.3 \ + doc/pcre2_general_context_create.3 \ + doc/pcre2_general_context_free.3 \ + doc/pcre2_get_error_message.3 \ + doc/pcre2_get_mark.3 \ + doc/pcre2_get_match_data_heapframes_size.3 \ + doc/pcre2_get_match_data_size.3 \ + doc/pcre2_get_ovector_count.3 \ + doc/pcre2_get_ovector_pointer.3 \ + doc/pcre2_get_startchar.3 \ + doc/pcre2_jit_compile.3 \ + doc/pcre2_jit_free_unused_memory.3 \ + doc/pcre2_jit_match.3 \ + doc/pcre2_jit_stack_assign.3 \ + doc/pcre2_jit_stack_create.3 \ + doc/pcre2_jit_stack_free.3 \ + doc/pcre2_maketables.3 \ + doc/pcre2_maketables_free.3 \ + doc/pcre2_match.3 \ + doc/pcre2_match_context_copy.3 \ + doc/pcre2_match_context_create.3 \ + doc/pcre2_match_context_free.3 \ + doc/pcre2_match_data_create.3 \ + doc/pcre2_match_data_create_from_pattern.3 \ + doc/pcre2_match_data_free.3 \ + doc/pcre2_pattern_convert.3 \ + doc/pcre2_pattern_info.3 \ + doc/pcre2_serialize_decode.3 \ + doc/pcre2_serialize_encode.3 \ + doc/pcre2_serialize_free.3 \ + doc/pcre2_serialize_get_number_of_codes.3 \ + doc/pcre2_set_bsr.3 \ + doc/pcre2_set_callout.3 \ + doc/pcre2_set_character_tables.3 \ + doc/pcre2_set_compile_extra_options.3 \ + doc/pcre2_set_compile_recursion_guard.3 \ + doc/pcre2_set_depth_limit.3 \ + doc/pcre2_set_glob_escape.3 \ + doc/pcre2_set_glob_separator.3 \ + doc/pcre2_set_heap_limit.3 \ + doc/pcre2_set_match_limit.3 \ + doc/pcre2_set_max_pattern_compiled_length.3 \ + doc/pcre2_set_max_pattern_length.3 \ + doc/pcre2_set_max_varlookbehind.3 \ + doc/pcre2_set_offset_limit.3 \ + doc/pcre2_set_optimize.3 \ + doc/pcre2_set_newline.3 \ + doc/pcre2_set_parens_nest_limit.3 \ + doc/pcre2_set_recursion_limit.3 \ + doc/pcre2_set_recursion_memory_management.3 \ + doc/pcre2_set_substitute_callout.3 \ + doc/pcre2_set_substitute_case_callout.3 \ + doc/pcre2_substitute.3 \ + doc/pcre2_substring_copy_byname.3 \ + doc/pcre2_substring_copy_bynumber.3 \ + doc/pcre2_substring_free.3 \ + doc/pcre2_substring_get_byname.3 \ + doc/pcre2_substring_get_bynumber.3 \ + doc/pcre2_substring_length_byname.3 \ + doc/pcre2_substring_length_bynumber.3 \ + doc/pcre2_substring_list_free.3 \ + doc/pcre2_substring_list_get.3 \ + doc/pcre2_substring_nametable_scan.3 \ + doc/pcre2_substring_number_from_name.3 \ + doc/pcre2api.3 \ + doc/pcre2build.3 \ + doc/pcre2callout.3 \ + doc/pcre2compat.3 \ + doc/pcre2convert.3 \ + doc/pcre2demo.3 \ + doc/pcre2grep.1 \ + doc/pcre2jit.3 \ + doc/pcre2limits.3 \ + doc/pcre2matching.3 \ + doc/pcre2partial.3 \ + doc/pcre2pattern.3 \ + doc/pcre2perform.3 \ + doc/pcre2posix.3 \ + doc/pcre2sample.3 \ + doc/pcre2serialize.3 \ + doc/pcre2syntax.3 \ + doc/pcre2test.1 \ + doc/pcre2unicode.3 + + +# The Libtool libraries to install. We'll add to this later. +lib_LTLIBRARIES = $(am__append_2) $(am__append_3) $(am__append_4) \ + $(am__append_11) +check_SCRIPTS = +dist_noinst_SCRIPTS = RunTest $(am__append_45) + +# Additional files to delete on 'make clean', 'make distclean', +# and 'make maintainer-clean'. It turns out that the default is to delete only +# those binaries that *this* configuration has created. If the configuration +# has been changed, some binaries may not get automatically deleted. Therefore +# we list them here. + +# RunTest and RunGrepTest should clean up after themselves, but just in case +# they don't, add their working files to CLEANFILES. +CLEANFILES = pcre2_dftables pcre2_jit_test pcre2fuzzcheck-8 \ + pcre2fuzzcheck-16 pcre2fuzzcheck-32 pcre2demo \ + src/pcre2_chartables.c testSinput test3input test3output \ + test3outputA test3outputB testtry teststdout teststderr \ + teststderrgrep testtemp1grep testtemp2grep testtrygrep \ + testNinputgrep +DISTCLEANFILES = src/config.h.in~ $(am__append_49) +MAINTAINERCLEANFILES = src/pcre2.h.generic src/config.h.generic + +# Additional files to bundle with the distribution, over and above what +# the Autotools include by default. + +# These files contain additional m4 macros that are used by autoconf. + +# These files contain maintenance information + +# These are support files for building with Bazel or Zig + +# These are support files for building under VMS + +# These files are usable versions of pcre2.h and config.h that are distributed +# for the benefit of people who are building PCRE2 manually, without the +# Autotools support. + +# The pcre2_ucptables.c file is #included by pcre2_tables.c + +# The pcre2_chartables.c.dist file is the default version of +# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified. + +# The JIT compiler lives in a separate directory, but its files are #included +# when pcre2_jit_compile.c is processed, so they must be distributed. + +# Some of the JIT sources are also in separate files that are #included. + +# PCRE2 demonstration program. Not built automatically. The point is that the +# users should build it themselves. So just distribute the source. +EXTRA_DIST = m4/ax_pthread.m4 m4/pcre2_visibility.m4 \ + NON-AUTOTOOLS-BUILD HACKING BUILD.bazel MODULE.bazel \ + WORKSPACE.bazel build.zig vms/configure.com \ + vms/openvms_readme.txt vms/pcre2.h_patch vms/stdint.h \ + src/pcre2.h.generic src/config.h.generic src/pcre2_ucptables.c \ + src/pcre2_chartables.c.dist deps/sljit/sljit_src/sljitConfig.h \ + deps/sljit/sljit_src/sljitConfigCPU.h \ + deps/sljit/sljit_src/sljitConfigInternal.h \ + deps/sljit/sljit_src/sljitLir.c \ + deps/sljit/sljit_src/sljitLir.h \ + deps/sljit/sljit_src/sljitNativeARM_32.c \ + deps/sljit/sljit_src/sljitNativeARM_64.c \ + deps/sljit/sljit_src/sljitNativeARM_T2_32.c \ + deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_32.c \ + deps/sljit/sljit_src/sljitNativeMIPS_64.c \ + deps/sljit/sljit_src/sljitNativeMIPS_common.c \ + deps/sljit/sljit_src/sljitNativePPC_32.c \ + deps/sljit/sljit_src/sljitNativePPC_64.c \ + deps/sljit/sljit_src/sljitNativePPC_common.c \ + deps/sljit/sljit_src/sljitNativeRISCV_32.c \ + deps/sljit/sljit_src/sljitNativeRISCV_64.c \ + deps/sljit/sljit_src/sljitNativeRISCV_common.c \ + deps/sljit/sljit_src/sljitNativeS390X.c \ + deps/sljit/sljit_src/sljitNativeX86_32.c \ + deps/sljit/sljit_src/sljitNativeX86_64.c \ + deps/sljit/sljit_src/sljitNativeX86_common.c \ + deps/sljit/sljit_src/sljitSerialize.c \ + deps/sljit/sljit_src/sljitUtils.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c \ + deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c \ + deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c \ + src/pcre2_jit_match.c src/pcre2_jit_misc.c \ + src/pcre2_printint.c RunTest.bat $(am__append_44) \ + testdata/grepbinary testdata/grepfilelist testdata/grepinput \ + testdata/grepinput3 testdata/grepinput8 testdata/grepinputBad8 \ + testdata/grepinputBad8_Trail testdata/grepinputC.bz2 \ + testdata/grepinputC.gz testdata/grepinputM \ + testdata/grepinputUN testdata/grepinputv testdata/grepinputx \ + testdata/greplist testdata/grepnot.bz2 testdata/grepoutput \ + testdata/grepoutput8 testdata/grepoutputC \ + testdata/grepoutputCN testdata/grepoutputCNU \ + testdata/grepoutputCU testdata/grepoutputCbz2 \ + testdata/grepoutputCgz testdata/grepoutputN \ + testdata/grepoutputUN testdata/greppatN4 testdata/testbtables \ + testdata/testinput1 testdata/testinput2 testdata/testinput3 \ + testdata/testinput4 testdata/testinput5 testdata/testinput6 \ + testdata/testinput7 testdata/testinput8 testdata/testinput9 \ + testdata/testinput10 testdata/testinput11 testdata/testinput12 \ + testdata/testinput13 testdata/testinput14 testdata/testinput15 \ + testdata/testinput16 testdata/testinput17 testdata/testinput18 \ + testdata/testinput19 testdata/testinput20 testdata/testinput21 \ + testdata/testinput22 testdata/testinput23 testdata/testinput24 \ + testdata/testinput25 testdata/testinput26 testdata/testinput27 \ + testdata/testinputEBC testdata/testinputheap \ + testdata/testoutput1 testdata/testoutput2 testdata/testoutput3 \ + testdata/testoutput3A testdata/testoutput3B \ + testdata/testoutput4 testdata/testoutput5 testdata/testoutput6 \ + testdata/testoutput7 testdata/testoutput8-16-2 \ + testdata/testoutput8-16-3 testdata/testoutput8-16-4 \ + testdata/testoutput8-32-2 testdata/testoutput8-32-3 \ + testdata/testoutput8-32-4 testdata/testoutput8-8-2 \ + testdata/testoutput8-8-3 testdata/testoutput8-8-4 \ + testdata/testoutput9 testdata/testoutput10 \ + testdata/testoutput11-16 testdata/testoutput11-32 \ + testdata/testoutput12-16 testdata/testoutput12-32 \ + testdata/testoutput13 testdata/testoutput14-16 \ + testdata/testoutput14-32 testdata/testoutput14-8 \ + testdata/testoutput15 testdata/testoutput16 \ + testdata/testoutput17 testdata/testoutput18 \ + testdata/testoutput19 testdata/testoutput20 \ + testdata/testoutput21 testdata/testoutput22-16 \ + testdata/testoutput22-32 testdata/testoutput22-8 \ + testdata/testoutput23 testdata/testoutput24 \ + testdata/testoutput25 testdata/testoutput26 \ + testdata/testoutput27 testdata/testoutputEBC \ + testdata/testoutputheap-16 testdata/testoutputheap-32 \ + testdata/testoutputheap-8 testdata/valgrind-jit.supp \ + testdata/wintestinput3 testdata/wintestoutput3 perltest.sh \ + src/pcre2demo.c cmake/COPYING-CMAKE-SCRIPTS \ + cmake/FindEditline.cmake cmake/FindReadline.cmake \ + cmake/pcre2-config-version.cmake.in \ + cmake/pcre2-config.cmake.in CMakeLists.txt config-cmake.h.in + +# These are the header files we'll install. We do not distribute pcre2.h +# because it is generated from pcre2.h.in. +nodist_include_HEADERS = src/pcre2.h +include_HEADERS = src/pcre2posix.h + +# This is the "config" script. +bin_SCRIPTS = pcre2-config +@WITH_REBUILD_CHARTABLES_TRUE@pcre2_dftables_SOURCES = src/pcre2_dftables.c +BUILT_SOURCES = src/pcre2_chartables.c +NODIST_SOURCES = src/pcre2_chartables.c +COMMON_SOURCES = \ + src/pcre2_auto_possess.c \ + src/pcre2_chkdint.c \ + src/pcre2_compile.c \ + src/pcre2_compile.h \ + src/pcre2_compile_class.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_convert.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_extuni.c \ + src/pcre2_find_bracket.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_char_inc.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_neon_inc.h \ + src/pcre2_jit_simd_inc.h \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_script_run.c \ + src/pcre2_serialize.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substitute.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_util.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c + +@WITH_PCRE2_8_TRUE@libpcre2_8_la_SOURCES = \ +@WITH_PCRE2_8_TRUE@ $(COMMON_SOURCES) + +@WITH_PCRE2_8_TRUE@nodist_libpcre2_8_la_SOURCES = \ +@WITH_PCRE2_8_TRUE@ $(NODIST_SOURCES) + +@WITH_PCRE2_8_TRUE@libpcre2_8_la_CFLAGS = -DPCRE2_CODE_UNIT_WIDTH=8 \ +@WITH_PCRE2_8_TRUE@ $(VISIBILITY_CFLAGS) $(CET_CFLAGS) \ +@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) $(am__append_5) \ +@WITH_PCRE2_8_TRUE@ $(am__append_8) +@WITH_PCRE2_8_TRUE@libpcre2_8_la_LIBADD = +@WITH_PCRE2_16_TRUE@libpcre2_16_la_SOURCES = \ +@WITH_PCRE2_16_TRUE@ $(COMMON_SOURCES) + +@WITH_PCRE2_16_TRUE@nodist_libpcre2_16_la_SOURCES = \ +@WITH_PCRE2_16_TRUE@ $(NODIST_SOURCES) + +@WITH_PCRE2_16_TRUE@libpcre2_16_la_CFLAGS = \ +@WITH_PCRE2_16_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=16 \ +@WITH_PCRE2_16_TRUE@ $(VISIBILITY_CFLAGS) $(CET_CFLAGS) \ +@WITH_PCRE2_16_TRUE@ $(AM_CFLAGS) $(am__append_6) \ +@WITH_PCRE2_16_TRUE@ $(am__append_9) +@WITH_PCRE2_16_TRUE@libpcre2_16_la_LIBADD = +@WITH_PCRE2_32_TRUE@libpcre2_32_la_SOURCES = \ +@WITH_PCRE2_32_TRUE@ $(COMMON_SOURCES) + +@WITH_PCRE2_32_TRUE@nodist_libpcre2_32_la_SOURCES = \ +@WITH_PCRE2_32_TRUE@ $(NODIST_SOURCES) + +@WITH_PCRE2_32_TRUE@libpcre2_32_la_CFLAGS = \ +@WITH_PCRE2_32_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=32 \ +@WITH_PCRE2_32_TRUE@ $(VISIBILITY_CFLAGS) $(CET_CFLAGS) \ +@WITH_PCRE2_32_TRUE@ $(AM_CFLAGS) $(am__append_7) \ +@WITH_PCRE2_32_TRUE@ $(am__append_10) +@WITH_PCRE2_32_TRUE@libpcre2_32_la_LIBADD = +@WITH_PCRE2_8_TRUE@libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS) +@WITH_PCRE2_16_TRUE@libpcre2_16_la_LDFLAGS = $(EXTRA_LIBPCRE2_16_LDFLAGS) +@WITH_PCRE2_32_TRUE@libpcre2_32_la_LDFLAGS = $(EXTRA_LIBPCRE2_32_LDFLAGS) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_SOURCES = src/pcre2posix.c +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_CFLAGS = \ +@WITH_PCRE2_8_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=8 \ +@WITH_PCRE2_8_TRUE@ @PCRE2POSIX_CFLAG@ $(VISIBILITY_CFLAGS) \ +@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) $(am__append_12) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS) +@WITH_PCRE2_8_TRUE@libpcre2_posix_la_LIBADD = libpcre2-8.la +@WITH_PCRE2_8_TRUE@pcre2grep_SOURCES = src/pcre2grep.c +@WITH_PCRE2_8_TRUE@pcre2grep_CFLAGS = $(AM_CFLAGS) $(am__append_14) +@WITH_PCRE2_8_TRUE@pcre2grep_LDADD = $(LIBZ) $(LIBBZ2) libpcre2-8.la \ +@WITH_PCRE2_8_TRUE@ $(am__append_15) +@WITH_FUZZ_SUPPORT_TRUE@noinst_LIBRARIES = $(am__append_16) \ +@WITH_FUZZ_SUPPORT_TRUE@ $(am__append_20) $(am__append_24) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@_libs_libpcre2_fuzzsupport_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_CFLAGS = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ -DSTANDALONE \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(AM_CFLAGS) \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__append_18) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@pcre2fuzzcheck_8_LDADD = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ libpcre2-8.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_8_TRUE@ $(am__append_19) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@_libs_libpcre2_fuzzsupport_16_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_CFLAGS = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ -DSTANDALONE \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(AM_CFLAGS) \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=16 \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(am__append_22) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@pcre2fuzzcheck_16_LDADD = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ libpcre2-16.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_16_TRUE@ $(am__append_23) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@_libs_libpcre2_fuzzsupport_32_a_LIBADD = +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_CFLAGS = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ -DSTANDALONE \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(AM_CFLAGS) \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ -DPCRE2_CODE_UNIT_WIDTH=32 \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(am__append_26) +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@pcre2fuzzcheck_32_LDADD = \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ libpcre2-32.la \ +@WITH_FUZZ_SUPPORT_TRUE@@WITH_PCRE2_32_TRUE@ $(am__append_27) +@WITH_PCRE2_8_TRUE@pcre2posix_test_SOURCES = src/pcre2posix_test.c +@WITH_PCRE2_8_TRUE@pcre2posix_test_CFLAGS = $(AM_CFLAGS) @PCRE2POSIX_CFLAG@ +@WITH_PCRE2_8_TRUE@pcre2posix_test_LDADD = libpcre2-posix.la libpcre2-8.la +@WITH_JIT_TRUE@pcre2_jit_test_SOURCES = src/pcre2_jit_test.c +@WITH_JIT_TRUE@pcre2_jit_test_CFLAGS = $(AM_CFLAGS) $(am__append_35) +@WITH_JIT_TRUE@pcre2_jit_test_LDADD = $(am__append_32) \ +@WITH_JIT_TRUE@ $(am__append_33) $(am__append_34) \ +@WITH_JIT_TRUE@ $(am__append_36) +pcre2test_SOURCES = src/pcre2test.c +pcre2test_CFLAGS = $(AM_CFLAGS) $(am__append_40) $(am__append_41) +pcre2test_LDADD = $(LIBREADLINE) $(am__append_37) $(am__append_38) \ + $(am__append_39) $(am__append_42) + +# We have .pc files for pkg-config users. +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = $(am__append_46) $(am__append_47) $(am__append_48) + +# gcov/lcov code coverage reporting +# +# Coverage reporting targets: +# +# coverage: Create a coverage report from 'make check' +# coverage-baseline: Capture baseline coverage information +# coverage-reset: This zeros the coverage counters only +# coverage-report: This creates the coverage report only +# coverage-clean-report: This removes the generated coverage report +# without cleaning the coverage data itself +# coverage-clean-data: This removes the captured coverage data without +# removing the coverage files created at compile time (*.gcno) +# coverage-clean: This cleans all coverage data including the generated +# coverage report. +@WITH_GCOV_TRUE@COVERAGE_TEST_NAME = $(PACKAGE) +@WITH_GCOV_TRUE@COVERAGE_NAME = $(PACKAGE)-$(VERSION) +@WITH_GCOV_TRUE@COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info +@WITH_GCOV_TRUE@COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage +@WITH_GCOV_TRUE@COVERAGE_LCOV_EXTRA_FLAGS = +@WITH_GCOV_TRUE@COVERAGE_GENHTML_EXTRA_FLAGS = +@WITH_GCOV_TRUE@coverage_quiet = $(coverage_quiet_$(V)) +@WITH_GCOV_TRUE@coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY)) +@WITH_GCOV_TRUE@coverage_quiet_0 = --quiet +all: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .log .o .obj .test .test$(EXEEXT) .trs +am--refresh: Makefile + @: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \ + $(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + echo ' $(SHELL) ./config.status'; \ + $(SHELL) ./config.status;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + $(SHELL) ./config.status --recheck + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + $(am__cd) $(srcdir) && $(AUTOCONF) +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) +$(am__aclocal_m4_deps): + +src/config.h: src/stamp-h1 + @test -f $@ || rm -f src/stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) src/stamp-h1 + +src/stamp-h1: $(top_srcdir)/src/config.h.in $(top_builddir)/config.status + @rm -f src/stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status src/config.h +$(top_srcdir)/src/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f src/stamp-h1 + touch $@ + +distclean-hdr: + -rm -f src/config.h src/stamp-h1 +libpcre2-8.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-8.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +libpcre2-16.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-16.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +libpcre2-32.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-32.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +libpcre2-posix.pc: $(top_builddir)/config.status $(srcdir)/libpcre2-posix.pc.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +pcre2-config: $(top_builddir)/config.status $(srcdir)/pcre2-config.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +src/pcre2.h: $(top_builddir)/config.status $(top_srcdir)/src/pcre2.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +clean-noinstLIBRARIES: + -test -z "$(noinst_LIBRARIES)" || rm -f $(noinst_LIBRARIES) + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } +src/$(am__dirstamp): + @$(MKDIR_P) src + @: > src/$(am__dirstamp) +src/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) src/$(DEPDIR) + @: > src/$(DEPDIR)/$(am__dirstamp) +src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) +.libs/$(am__dirstamp): + @$(MKDIR_P) .libs + @: > .libs/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport-16.a: $(_libs_libpcre2_fuzzsupport_16_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_16_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_16_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport-16.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_16_a_AR) .libs/libpcre2-fuzzsupport-16.a $(_libs_libpcre2_fuzzsupport_16_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_16_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport-16.a +src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport-32.a: $(_libs_libpcre2_fuzzsupport_32_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_32_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_32_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport-32.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_32_a_AR) .libs/libpcre2-fuzzsupport-32.a $(_libs_libpcre2_fuzzsupport_32_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_32_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport-32.a +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +.libs/libpcre2-fuzzsupport.a: $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_DEPENDENCIES) $(EXTRA__libs_libpcre2_fuzzsupport_a_DEPENDENCIES) .libs/$(am__dirstamp) + $(AM_V_at)-rm -f .libs/libpcre2-fuzzsupport.a + $(AM_V_AR)$(_libs_libpcre2_fuzzsupport_a_AR) .libs/libpcre2-fuzzsupport.a $(_libs_libpcre2_fuzzsupport_a_OBJECTS) $(_libs_libpcre2_fuzzsupport_a_LIBADD) + $(AM_V_at)$(RANLIB) .libs/libpcre2-fuzzsupport.a +src/libpcre2_16_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_config.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_context.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_convert.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_error.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_extuni.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_maketables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_match_data.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_newline.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_script_run.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_serialize.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_string_utils.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_study.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_substitute.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_substring.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_tables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_ucd.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_valid_utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_xclass.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_16_la-pcre2_chartables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-16.la: $(libpcre2_16_la_OBJECTS) $(libpcre2_16_la_DEPENDENCIES) $(EXTRA_libpcre2_16_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_16_la_LINK) $(am_libpcre2_16_la_rpath) $(libpcre2_16_la_OBJECTS) $(libpcre2_16_la_LIBADD) $(LIBS) +src/libpcre2_32_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_config.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_context.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_convert.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_error.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_extuni.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_maketables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_match_data.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_newline.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_script_run.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_serialize.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_string_utils.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_study.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_substitute.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_substring.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_tables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_ucd.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_valid_utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_xclass.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_32_la-pcre2_chartables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-32.la: $(libpcre2_32_la_OBJECTS) $(libpcre2_32_la_DEPENDENCIES) $(EXTRA_libpcre2_32_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_32_la_LINK) $(am_libpcre2_32_la_rpath) $(libpcre2_32_la_OBJECTS) $(libpcre2_32_la_LIBADD) $(LIBS) +src/libpcre2_8_la-pcre2_auto_possess.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_chkdint.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_compile_class.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_config.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_context.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_convert.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_dfa_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_error.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_extuni.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_find_bracket.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_jit_compile.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_maketables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_match.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_match_data.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_newline.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_ord2utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_pattern_info.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_script_run.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_serialize.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_string_utils.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_study.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_substitute.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_substring.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_tables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_ucd.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_valid_utf.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_xclass.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) +src/libpcre2_8_la-pcre2_chartables.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-8.la: $(libpcre2_8_la_OBJECTS) $(libpcre2_8_la_DEPENDENCIES) $(EXTRA_libpcre2_8_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_8_la_LINK) $(am_libpcre2_8_la_rpath) $(libpcre2_8_la_OBJECTS) $(libpcre2_8_la_LIBADD) $(LIBS) +src/libpcre2_posix_la-pcre2posix.lo: src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +libpcre2-posix.la: $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_DEPENDENCIES) $(EXTRA_libpcre2_posix_la_DEPENDENCIES) + $(AM_V_CCLD)$(libpcre2_posix_la_LINK) $(am_libpcre2_posix_la_rpath) $(libpcre2_posix_la_OBJECTS) $(libpcre2_posix_la_LIBADD) $(LIBS) +src/pcre2_dftables.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2_dftables$(EXEEXT): $(pcre2_dftables_OBJECTS) $(pcre2_dftables_DEPENDENCIES) $(EXTRA_pcre2_dftables_DEPENDENCIES) + @rm -f pcre2_dftables$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(pcre2_dftables_OBJECTS) $(pcre2_dftables_LDADD) $(LIBS) +src/pcre2_jit_test-pcre2_jit_test.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2_jit_test$(EXEEXT): $(pcre2_jit_test_OBJECTS) $(pcre2_jit_test_DEPENDENCIES) $(EXTRA_pcre2_jit_test_DEPENDENCIES) + @rm -f pcre2_jit_test$(EXEEXT) + $(AM_V_CCLD)$(pcre2_jit_test_LINK) $(pcre2_jit_test_OBJECTS) $(pcre2_jit_test_LDADD) $(LIBS) +src/pcre2fuzzcheck_16-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck-16$(EXEEXT): $(pcre2fuzzcheck_16_OBJECTS) $(pcre2fuzzcheck_16_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_16_DEPENDENCIES) + @rm -f pcre2fuzzcheck-16$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_16_LINK) $(pcre2fuzzcheck_16_OBJECTS) $(pcre2fuzzcheck_16_LDADD) $(LIBS) +src/pcre2fuzzcheck_32-pcre2_fuzzsupport.$(OBJEXT): \ + src/$(am__dirstamp) src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck-32$(EXEEXT): $(pcre2fuzzcheck_32_OBJECTS) $(pcre2fuzzcheck_32_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_32_DEPENDENCIES) + @rm -f pcre2fuzzcheck-32$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_32_LINK) $(pcre2fuzzcheck_32_OBJECTS) $(pcre2fuzzcheck_32_LDADD) $(LIBS) +src/pcre2fuzzcheck_8-pcre2_fuzzsupport.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2fuzzcheck-8$(EXEEXT): $(pcre2fuzzcheck_8_OBJECTS) $(pcre2fuzzcheck_8_DEPENDENCIES) $(EXTRA_pcre2fuzzcheck_8_DEPENDENCIES) + @rm -f pcre2fuzzcheck-8$(EXEEXT) + $(AM_V_CCLD)$(pcre2fuzzcheck_8_LINK) $(pcre2fuzzcheck_8_OBJECTS) $(pcre2fuzzcheck_8_LDADD) $(LIBS) +src/pcre2grep-pcre2grep.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2grep$(EXEEXT): $(pcre2grep_OBJECTS) $(pcre2grep_DEPENDENCIES) $(EXTRA_pcre2grep_DEPENDENCIES) + @rm -f pcre2grep$(EXEEXT) + $(AM_V_CCLD)$(pcre2grep_LINK) $(pcre2grep_OBJECTS) $(pcre2grep_LDADD) $(LIBS) +src/pcre2posix_test-pcre2posix_test.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2posix_test$(EXEEXT): $(pcre2posix_test_OBJECTS) $(pcre2posix_test_DEPENDENCIES) $(EXTRA_pcre2posix_test_DEPENDENCIES) + @rm -f pcre2posix_test$(EXEEXT) + $(AM_V_CCLD)$(pcre2posix_test_LINK) $(pcre2posix_test_OBJECTS) $(pcre2posix_test_LDADD) $(LIBS) +src/pcre2test-pcre2test.$(OBJEXT): src/$(am__dirstamp) \ + src/$(DEPDIR)/$(am__dirstamp) + +pcre2test$(EXEEXT): $(pcre2test_OBJECTS) $(pcre2test_DEPENDENCIES) $(EXTRA_pcre2test_DEPENDENCIES) + @rm -f pcre2test$(EXEEXT) + $(AM_V_CCLD)$(pcre2test_LINK) $(pcre2test_OBJECTS) $(pcre2test_LDADD) $(LIBS) +install-binSCRIPTS: $(bin_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + -rm -f src/*.$(OBJEXT) + -rm -f src/*.lo + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_dftables.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2grep-pcre2grep.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/$(DEPDIR)/pcre2test-pcre2test.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_16_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_32_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -MT src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(_libs_libpcre2_fuzzsupport_a_CFLAGS) $(CFLAGS) -c -o src/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/libpcre2_16_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_16_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_auto_possess.c' object='src/libpcre2_16_la-pcre2_auto_possess.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c + +src/libpcre2_16_la-pcre2_chkdint.lo: src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_chkdint.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Tpo -c -o src/libpcre2_16_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chkdint.c' object='src/libpcre2_16_la-pcre2_chkdint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c + +src/libpcre2_16_la-pcre2_compile.lo: src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Tpo -c -o src/libpcre2_16_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile.c' object='src/libpcre2_16_la-pcre2_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c + +src/libpcre2_16_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Tpo -c -o src/libpcre2_16_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_16_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + +src/libpcre2_16_la-pcre2_config.lo: src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo -c -o src/libpcre2_16_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_config.c' object='src/libpcre2_16_la-pcre2_config.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c + +src/libpcre2_16_la-pcre2_context.lo: src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_context.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Tpo -c -o src/libpcre2_16_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_context.c' object='src/libpcre2_16_la-pcre2_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c + +src/libpcre2_16_la-pcre2_convert.lo: src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_convert.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Tpo -c -o src/libpcre2_16_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_convert.c' object='src/libpcre2_16_la-pcre2_convert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c + +src/libpcre2_16_la-pcre2_dfa_match.lo: src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_dfa_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Tpo -c -o src/libpcre2_16_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_dfa_match.c' object='src/libpcre2_16_la-pcre2_dfa_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c + +src/libpcre2_16_la-pcre2_error.lo: src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_error.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Tpo -c -o src/libpcre2_16_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_error.c' object='src/libpcre2_16_la-pcre2_error.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c + +src/libpcre2_16_la-pcre2_extuni.lo: src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_extuni.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Tpo -c -o src/libpcre2_16_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_extuni.c' object='src/libpcre2_16_la-pcre2_extuni.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c + +src/libpcre2_16_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_16_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + +src/libpcre2_16_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_16_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_compile.c' object='src/libpcre2_16_la-pcre2_jit_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c + +src/libpcre2_16_la-pcre2_maketables.lo: src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_maketables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Tpo -c -o src/libpcre2_16_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_maketables.c' object='src/libpcre2_16_la-pcre2_maketables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c + +src/libpcre2_16_la-pcre2_match.lo: src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Tpo -c -o src/libpcre2_16_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match.c' object='src/libpcre2_16_la-pcre2_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c + +src/libpcre2_16_la-pcre2_match_data.lo: src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_match_data.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Tpo -c -o src/libpcre2_16_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match_data.c' object='src/libpcre2_16_la-pcre2_match_data.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c + +src/libpcre2_16_la-pcre2_newline.lo: src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_newline.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Tpo -c -o src/libpcre2_16_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_newline.c' object='src/libpcre2_16_la-pcre2_newline.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c + +src/libpcre2_16_la-pcre2_ord2utf.lo: src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_ord2utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Tpo -c -o src/libpcre2_16_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ord2utf.c' object='src/libpcre2_16_la-pcre2_ord2utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c + +src/libpcre2_16_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_pattern_info.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Tpo -c -o src/libpcre2_16_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_pattern_info.c' object='src/libpcre2_16_la-pcre2_pattern_info.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c + +src/libpcre2_16_la-pcre2_script_run.lo: src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Tpo -c -o src/libpcre2_16_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_16_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c + +src/libpcre2_16_la-pcre2_serialize.lo: src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Tpo -c -o src/libpcre2_16_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_serialize.c' object='src/libpcre2_16_la-pcre2_serialize.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c + +src/libpcre2_16_la-pcre2_string_utils.lo: src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_string_utils.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Tpo -c -o src/libpcre2_16_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_string_utils.c' object='src/libpcre2_16_la-pcre2_string_utils.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c + +src/libpcre2_16_la-pcre2_study.lo: src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_study.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Tpo -c -o src/libpcre2_16_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_study.c' object='src/libpcre2_16_la-pcre2_study.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c + +src/libpcre2_16_la-pcre2_substitute.lo: src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_substitute.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Tpo -c -o src/libpcre2_16_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substitute.c' object='src/libpcre2_16_la-pcre2_substitute.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c + +src/libpcre2_16_la-pcre2_substring.lo: src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_substring.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Tpo -c -o src/libpcre2_16_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substring.c' object='src/libpcre2_16_la-pcre2_substring.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c + +src/libpcre2_16_la-pcre2_tables.lo: src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_tables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Tpo -c -o src/libpcre2_16_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_tables.c' object='src/libpcre2_16_la-pcre2_tables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c + +src/libpcre2_16_la-pcre2_ucd.lo: src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_ucd.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Tpo -c -o src/libpcre2_16_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ucd.c' object='src/libpcre2_16_la-pcre2_ucd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c + +src/libpcre2_16_la-pcre2_valid_utf.lo: src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_valid_utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Tpo -c -o src/libpcre2_16_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_valid_utf.c' object='src/libpcre2_16_la-pcre2_valid_utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c + +src/libpcre2_16_la-pcre2_xclass.lo: src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_xclass.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Tpo -c -o src/libpcre2_16_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_xclass.c' object='src/libpcre2_16_la-pcre2_xclass.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c + +src/libpcre2_16_la-pcre2_chartables.lo: src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_16_la-pcre2_chartables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Tpo -c -o src/libpcre2_16_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Tpo src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chartables.c' object='src/libpcre2_16_la-pcre2_chartables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_16_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_16_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c + +src/libpcre2_32_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_32_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_auto_possess.c' object='src/libpcre2_32_la-pcre2_auto_possess.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c + +src/libpcre2_32_la-pcre2_chkdint.lo: src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_chkdint.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Tpo -c -o src/libpcre2_32_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chkdint.c' object='src/libpcre2_32_la-pcre2_chkdint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c + +src/libpcre2_32_la-pcre2_compile.lo: src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Tpo -c -o src/libpcre2_32_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile.c' object='src/libpcre2_32_la-pcre2_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c + +src/libpcre2_32_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Tpo -c -o src/libpcre2_32_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_32_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + +src/libpcre2_32_la-pcre2_config.lo: src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo -c -o src/libpcre2_32_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_config.c' object='src/libpcre2_32_la-pcre2_config.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c + +src/libpcre2_32_la-pcre2_context.lo: src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_context.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Tpo -c -o src/libpcre2_32_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_context.c' object='src/libpcre2_32_la-pcre2_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c + +src/libpcre2_32_la-pcre2_convert.lo: src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_convert.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Tpo -c -o src/libpcre2_32_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_convert.c' object='src/libpcre2_32_la-pcre2_convert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c + +src/libpcre2_32_la-pcre2_dfa_match.lo: src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_dfa_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Tpo -c -o src/libpcre2_32_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_dfa_match.c' object='src/libpcre2_32_la-pcre2_dfa_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c + +src/libpcre2_32_la-pcre2_error.lo: src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_error.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Tpo -c -o src/libpcre2_32_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_error.c' object='src/libpcre2_32_la-pcre2_error.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c + +src/libpcre2_32_la-pcre2_extuni.lo: src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_extuni.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Tpo -c -o src/libpcre2_32_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_extuni.c' object='src/libpcre2_32_la-pcre2_extuni.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c + +src/libpcre2_32_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_32_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + +src/libpcre2_32_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_32_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_compile.c' object='src/libpcre2_32_la-pcre2_jit_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c + +src/libpcre2_32_la-pcre2_maketables.lo: src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_maketables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Tpo -c -o src/libpcre2_32_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_maketables.c' object='src/libpcre2_32_la-pcre2_maketables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c + +src/libpcre2_32_la-pcre2_match.lo: src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Tpo -c -o src/libpcre2_32_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match.c' object='src/libpcre2_32_la-pcre2_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c + +src/libpcre2_32_la-pcre2_match_data.lo: src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_match_data.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Tpo -c -o src/libpcre2_32_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match_data.c' object='src/libpcre2_32_la-pcre2_match_data.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c + +src/libpcre2_32_la-pcre2_newline.lo: src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_newline.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Tpo -c -o src/libpcre2_32_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_newline.c' object='src/libpcre2_32_la-pcre2_newline.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c + +src/libpcre2_32_la-pcre2_ord2utf.lo: src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_ord2utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Tpo -c -o src/libpcre2_32_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ord2utf.c' object='src/libpcre2_32_la-pcre2_ord2utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c + +src/libpcre2_32_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_pattern_info.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Tpo -c -o src/libpcre2_32_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_pattern_info.c' object='src/libpcre2_32_la-pcre2_pattern_info.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c + +src/libpcre2_32_la-pcre2_script_run.lo: src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Tpo -c -o src/libpcre2_32_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_32_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c + +src/libpcre2_32_la-pcre2_serialize.lo: src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Tpo -c -o src/libpcre2_32_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_serialize.c' object='src/libpcre2_32_la-pcre2_serialize.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c + +src/libpcre2_32_la-pcre2_string_utils.lo: src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_string_utils.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Tpo -c -o src/libpcre2_32_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_string_utils.c' object='src/libpcre2_32_la-pcre2_string_utils.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c + +src/libpcre2_32_la-pcre2_study.lo: src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_study.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Tpo -c -o src/libpcre2_32_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_study.c' object='src/libpcre2_32_la-pcre2_study.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c + +src/libpcre2_32_la-pcre2_substitute.lo: src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_substitute.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Tpo -c -o src/libpcre2_32_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substitute.c' object='src/libpcre2_32_la-pcre2_substitute.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c + +src/libpcre2_32_la-pcre2_substring.lo: src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_substring.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Tpo -c -o src/libpcre2_32_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substring.c' object='src/libpcre2_32_la-pcre2_substring.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c + +src/libpcre2_32_la-pcre2_tables.lo: src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_tables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Tpo -c -o src/libpcre2_32_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_tables.c' object='src/libpcre2_32_la-pcre2_tables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c + +src/libpcre2_32_la-pcre2_ucd.lo: src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_ucd.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Tpo -c -o src/libpcre2_32_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ucd.c' object='src/libpcre2_32_la-pcre2_ucd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c + +src/libpcre2_32_la-pcre2_valid_utf.lo: src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_valid_utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Tpo -c -o src/libpcre2_32_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_valid_utf.c' object='src/libpcre2_32_la-pcre2_valid_utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c + +src/libpcre2_32_la-pcre2_xclass.lo: src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_xclass.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Tpo -c -o src/libpcre2_32_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_xclass.c' object='src/libpcre2_32_la-pcre2_xclass.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c + +src/libpcre2_32_la-pcre2_chartables.lo: src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_32_la-pcre2_chartables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Tpo -c -o src/libpcre2_32_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Tpo src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chartables.c' object='src/libpcre2_32_la-pcre2_chartables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_32_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_32_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c + +src/libpcre2_8_la-pcre2_auto_possess.lo: src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_auto_possess.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Tpo -c -o src/libpcre2_8_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_auto_possess.c' object='src/libpcre2_8_la-pcre2_auto_possess.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_auto_possess.lo `test -f 'src/pcre2_auto_possess.c' || echo '$(srcdir)/'`src/pcre2_auto_possess.c + +src/libpcre2_8_la-pcre2_chkdint.lo: src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_chkdint.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Tpo -c -o src/libpcre2_8_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chkdint.c' object='src/libpcre2_8_la-pcre2_chkdint.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_chkdint.lo `test -f 'src/pcre2_chkdint.c' || echo '$(srcdir)/'`src/pcre2_chkdint.c + +src/libpcre2_8_la-pcre2_compile.lo: src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Tpo -c -o src/libpcre2_8_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile.c' object='src/libpcre2_8_la-pcre2_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile.lo `test -f 'src/pcre2_compile.c' || echo '$(srcdir)/'`src/pcre2_compile.c + +src/libpcre2_8_la-pcre2_compile_class.lo: src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_compile_class.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Tpo -c -o src/libpcre2_8_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_compile_class.c' object='src/libpcre2_8_la-pcre2_compile_class.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_compile_class.lo `test -f 'src/pcre2_compile_class.c' || echo '$(srcdir)/'`src/pcre2_compile_class.c + +src/libpcre2_8_la-pcre2_config.lo: src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_config.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo -c -o src/libpcre2_8_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_config.c' object='src/libpcre2_8_la-pcre2_config.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_config.lo `test -f 'src/pcre2_config.c' || echo '$(srcdir)/'`src/pcre2_config.c + +src/libpcre2_8_la-pcre2_context.lo: src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_context.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Tpo -c -o src/libpcre2_8_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_context.c' object='src/libpcre2_8_la-pcre2_context.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_context.lo `test -f 'src/pcre2_context.c' || echo '$(srcdir)/'`src/pcre2_context.c + +src/libpcre2_8_la-pcre2_convert.lo: src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_convert.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Tpo -c -o src/libpcre2_8_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_convert.c' object='src/libpcre2_8_la-pcre2_convert.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_convert.lo `test -f 'src/pcre2_convert.c' || echo '$(srcdir)/'`src/pcre2_convert.c + +src/libpcre2_8_la-pcre2_dfa_match.lo: src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_dfa_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Tpo -c -o src/libpcre2_8_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_dfa_match.c' object='src/libpcre2_8_la-pcre2_dfa_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_dfa_match.lo `test -f 'src/pcre2_dfa_match.c' || echo '$(srcdir)/'`src/pcre2_dfa_match.c + +src/libpcre2_8_la-pcre2_error.lo: src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_error.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Tpo -c -o src/libpcre2_8_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_error.c' object='src/libpcre2_8_la-pcre2_error.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_error.lo `test -f 'src/pcre2_error.c' || echo '$(srcdir)/'`src/pcre2_error.c + +src/libpcre2_8_la-pcre2_extuni.lo: src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_extuni.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Tpo -c -o src/libpcre2_8_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_extuni.c' object='src/libpcre2_8_la-pcre2_extuni.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_extuni.lo `test -f 'src/pcre2_extuni.c' || echo '$(srcdir)/'`src/pcre2_extuni.c + +src/libpcre2_8_la-pcre2_find_bracket.lo: src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_find_bracket.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_find_bracket.c' object='src/libpcre2_8_la-pcre2_find_bracket.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_find_bracket.lo `test -f 'src/pcre2_find_bracket.c' || echo '$(srcdir)/'`src/pcre2_find_bracket.c + +src/libpcre2_8_la-pcre2_jit_compile.lo: src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_jit_compile.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo -c -o src/libpcre2_8_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_compile.c' object='src/libpcre2_8_la-pcre2_jit_compile.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_jit_compile.lo `test -f 'src/pcre2_jit_compile.c' || echo '$(srcdir)/'`src/pcre2_jit_compile.c + +src/libpcre2_8_la-pcre2_maketables.lo: src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_maketables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Tpo -c -o src/libpcre2_8_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_maketables.c' object='src/libpcre2_8_la-pcre2_maketables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_maketables.lo `test -f 'src/pcre2_maketables.c' || echo '$(srcdir)/'`src/pcre2_maketables.c + +src/libpcre2_8_la-pcre2_match.lo: src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_match.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Tpo -c -o src/libpcre2_8_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match.c' object='src/libpcre2_8_la-pcre2_match.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_match.lo `test -f 'src/pcre2_match.c' || echo '$(srcdir)/'`src/pcre2_match.c + +src/libpcre2_8_la-pcre2_match_data.lo: src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_match_data.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Tpo -c -o src/libpcre2_8_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_match_data.c' object='src/libpcre2_8_la-pcre2_match_data.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_match_data.lo `test -f 'src/pcre2_match_data.c' || echo '$(srcdir)/'`src/pcre2_match_data.c + +src/libpcre2_8_la-pcre2_newline.lo: src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_newline.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Tpo -c -o src/libpcre2_8_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_newline.c' object='src/libpcre2_8_la-pcre2_newline.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_newline.lo `test -f 'src/pcre2_newline.c' || echo '$(srcdir)/'`src/pcre2_newline.c + +src/libpcre2_8_la-pcre2_ord2utf.lo: src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_ord2utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Tpo -c -o src/libpcre2_8_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ord2utf.c' object='src/libpcre2_8_la-pcre2_ord2utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_ord2utf.lo `test -f 'src/pcre2_ord2utf.c' || echo '$(srcdir)/'`src/pcre2_ord2utf.c + +src/libpcre2_8_la-pcre2_pattern_info.lo: src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_pattern_info.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Tpo -c -o src/libpcre2_8_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_pattern_info.c' object='src/libpcre2_8_la-pcre2_pattern_info.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_pattern_info.lo `test -f 'src/pcre2_pattern_info.c' || echo '$(srcdir)/'`src/pcre2_pattern_info.c + +src/libpcre2_8_la-pcre2_script_run.lo: src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_script_run.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Tpo -c -o src/libpcre2_8_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_script_run.c' object='src/libpcre2_8_la-pcre2_script_run.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_script_run.lo `test -f 'src/pcre2_script_run.c' || echo '$(srcdir)/'`src/pcre2_script_run.c + +src/libpcre2_8_la-pcre2_serialize.lo: src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_serialize.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Tpo -c -o src/libpcre2_8_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_serialize.c' object='src/libpcre2_8_la-pcre2_serialize.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_serialize.lo `test -f 'src/pcre2_serialize.c' || echo '$(srcdir)/'`src/pcre2_serialize.c + +src/libpcre2_8_la-pcre2_string_utils.lo: src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_string_utils.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Tpo -c -o src/libpcre2_8_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_string_utils.c' object='src/libpcre2_8_la-pcre2_string_utils.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_string_utils.lo `test -f 'src/pcre2_string_utils.c' || echo '$(srcdir)/'`src/pcre2_string_utils.c + +src/libpcre2_8_la-pcre2_study.lo: src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_study.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Tpo -c -o src/libpcre2_8_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_study.c' object='src/libpcre2_8_la-pcre2_study.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_study.lo `test -f 'src/pcre2_study.c' || echo '$(srcdir)/'`src/pcre2_study.c + +src/libpcre2_8_la-pcre2_substitute.lo: src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_substitute.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Tpo -c -o src/libpcre2_8_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substitute.c' object='src/libpcre2_8_la-pcre2_substitute.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_substitute.lo `test -f 'src/pcre2_substitute.c' || echo '$(srcdir)/'`src/pcre2_substitute.c + +src/libpcre2_8_la-pcre2_substring.lo: src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_substring.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Tpo -c -o src/libpcre2_8_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_substring.c' object='src/libpcre2_8_la-pcre2_substring.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_substring.lo `test -f 'src/pcre2_substring.c' || echo '$(srcdir)/'`src/pcre2_substring.c + +src/libpcre2_8_la-pcre2_tables.lo: src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_tables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Tpo -c -o src/libpcre2_8_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_tables.c' object='src/libpcre2_8_la-pcre2_tables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_tables.lo `test -f 'src/pcre2_tables.c' || echo '$(srcdir)/'`src/pcre2_tables.c + +src/libpcre2_8_la-pcre2_ucd.lo: src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_ucd.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Tpo -c -o src/libpcre2_8_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_ucd.c' object='src/libpcre2_8_la-pcre2_ucd.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_ucd.lo `test -f 'src/pcre2_ucd.c' || echo '$(srcdir)/'`src/pcre2_ucd.c + +src/libpcre2_8_la-pcre2_valid_utf.lo: src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_valid_utf.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Tpo -c -o src/libpcre2_8_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_valid_utf.c' object='src/libpcre2_8_la-pcre2_valid_utf.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_valid_utf.lo `test -f 'src/pcre2_valid_utf.c' || echo '$(srcdir)/'`src/pcre2_valid_utf.c + +src/libpcre2_8_la-pcre2_xclass.lo: src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_xclass.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Tpo -c -o src/libpcre2_8_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_xclass.c' object='src/libpcre2_8_la-pcre2_xclass.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_xclass.lo `test -f 'src/pcre2_xclass.c' || echo '$(srcdir)/'`src/pcre2_xclass.c + +src/libpcre2_8_la-pcre2_chartables.lo: src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_8_la-pcre2_chartables.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Tpo -c -o src/libpcre2_8_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Tpo src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_chartables.c' object='src/libpcre2_8_la-pcre2_chartables.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_8_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_8_la-pcre2_chartables.lo `test -f 'src/pcre2_chartables.c' || echo '$(srcdir)/'`src/pcre2_chartables.c + +src/libpcre2_posix_la-pcre2posix.lo: src/pcre2posix.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_posix_la_CFLAGS) $(CFLAGS) -MT src/libpcre2_posix_la-pcre2posix.lo -MD -MP -MF src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Tpo -c -o src/libpcre2_posix_la-pcre2posix.lo `test -f 'src/pcre2posix.c' || echo '$(srcdir)/'`src/pcre2posix.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Tpo src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2posix.c' object='src/libpcre2_posix_la-pcre2posix.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libpcre2_posix_la_CFLAGS) $(CFLAGS) -c -o src/libpcre2_posix_la-pcre2posix.lo `test -f 'src/pcre2posix.c' || echo '$(srcdir)/'`src/pcre2posix.c + +src/pcre2_jit_test-pcre2_jit_test.o: src/pcre2_jit_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -MT src/pcre2_jit_test-pcre2_jit_test.o -MD -MP -MF src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo -c -o src/pcre2_jit_test-pcre2_jit_test.o `test -f 'src/pcre2_jit_test.c' || echo '$(srcdir)/'`src/pcre2_jit_test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_test.c' object='src/pcre2_jit_test-pcre2_jit_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -c -o src/pcre2_jit_test-pcre2_jit_test.o `test -f 'src/pcre2_jit_test.c' || echo '$(srcdir)/'`src/pcre2_jit_test.c + +src/pcre2_jit_test-pcre2_jit_test.obj: src/pcre2_jit_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -MT src/pcre2_jit_test-pcre2_jit_test.obj -MD -MP -MF src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo -c -o src/pcre2_jit_test-pcre2_jit_test.obj `if test -f 'src/pcre2_jit_test.c'; then $(CYGPATH_W) 'src/pcre2_jit_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_jit_test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Tpo src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_jit_test.c' object='src/pcre2_jit_test-pcre2_jit_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2_jit_test_CFLAGS) $(CFLAGS) -c -o src/pcre2_jit_test-pcre2_jit_test.obj `if test -f 'src/pcre2_jit_test.c'; then $(CYGPATH_W) 'src/pcre2_jit_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_jit_test.c'; fi` + +src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_16_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_16-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_32_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_32-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.o `test -f 'src/pcre2_fuzzsupport.c' || echo '$(srcdir)/'`src/pcre2_fuzzsupport.c + +src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj: src/pcre2_fuzzsupport.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -MT src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj -MD -MP -MF src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Tpo src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2_fuzzsupport.c' object='src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2fuzzcheck_8_CFLAGS) $(CFLAGS) -c -o src/pcre2fuzzcheck_8-pcre2_fuzzsupport.obj `if test -f 'src/pcre2_fuzzsupport.c'; then $(CYGPATH_W) 'src/pcre2_fuzzsupport.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2_fuzzsupport.c'; fi` + +src/pcre2grep-pcre2grep.o: src/pcre2grep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -MT src/pcre2grep-pcre2grep.o -MD -MP -MF src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo -c -o src/pcre2grep-pcre2grep.o `test -f 'src/pcre2grep.c' || echo '$(srcdir)/'`src/pcre2grep.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo src/$(DEPDIR)/pcre2grep-pcre2grep.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2grep.c' object='src/pcre2grep-pcre2grep.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -c -o src/pcre2grep-pcre2grep.o `test -f 'src/pcre2grep.c' || echo '$(srcdir)/'`src/pcre2grep.c + +src/pcre2grep-pcre2grep.obj: src/pcre2grep.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -MT src/pcre2grep-pcre2grep.obj -MD -MP -MF src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo -c -o src/pcre2grep-pcre2grep.obj `if test -f 'src/pcre2grep.c'; then $(CYGPATH_W) 'src/pcre2grep.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2grep.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2grep-pcre2grep.Tpo src/$(DEPDIR)/pcre2grep-pcre2grep.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2grep.c' object='src/pcre2grep-pcre2grep.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2grep_CFLAGS) $(CFLAGS) -c -o src/pcre2grep-pcre2grep.obj `if test -f 'src/pcre2grep.c'; then $(CYGPATH_W) 'src/pcre2grep.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2grep.c'; fi` + +src/pcre2posix_test-pcre2posix_test.o: src/pcre2posix_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -MT src/pcre2posix_test-pcre2posix_test.o -MD -MP -MF src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo -c -o src/pcre2posix_test-pcre2posix_test.o `test -f 'src/pcre2posix_test.c' || echo '$(srcdir)/'`src/pcre2posix_test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2posix_test.c' object='src/pcre2posix_test-pcre2posix_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -c -o src/pcre2posix_test-pcre2posix_test.o `test -f 'src/pcre2posix_test.c' || echo '$(srcdir)/'`src/pcre2posix_test.c + +src/pcre2posix_test-pcre2posix_test.obj: src/pcre2posix_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -MT src/pcre2posix_test-pcre2posix_test.obj -MD -MP -MF src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo -c -o src/pcre2posix_test-pcre2posix_test.obj `if test -f 'src/pcre2posix_test.c'; then $(CYGPATH_W) 'src/pcre2posix_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2posix_test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Tpo src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2posix_test.c' object='src/pcre2posix_test-pcre2posix_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2posix_test_CFLAGS) $(CFLAGS) -c -o src/pcre2posix_test-pcre2posix_test.obj `if test -f 'src/pcre2posix_test.c'; then $(CYGPATH_W) 'src/pcre2posix_test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2posix_test.c'; fi` + +src/pcre2test-pcre2test.o: src/pcre2test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -MT src/pcre2test-pcre2test.o -MD -MP -MF src/$(DEPDIR)/pcre2test-pcre2test.Tpo -c -o src/pcre2test-pcre2test.o `test -f 'src/pcre2test.c' || echo '$(srcdir)/'`src/pcre2test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2test-pcre2test.Tpo src/$(DEPDIR)/pcre2test-pcre2test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2test.c' object='src/pcre2test-pcre2test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -c -o src/pcre2test-pcre2test.o `test -f 'src/pcre2test.c' || echo '$(srcdir)/'`src/pcre2test.c + +src/pcre2test-pcre2test.obj: src/pcre2test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -MT src/pcre2test-pcre2test.obj -MD -MP -MF src/$(DEPDIR)/pcre2test-pcre2test.Tpo -c -o src/pcre2test-pcre2test.obj `if test -f 'src/pcre2test.c'; then $(CYGPATH_W) 'src/pcre2test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) src/$(DEPDIR)/pcre2test-pcre2test.Tpo src/$(DEPDIR)/pcre2test-pcre2test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='src/pcre2test.c' object='src/pcre2test-pcre2test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pcre2test_CFLAGS) $(CFLAGS) -c -o src/pcre2test-pcre2test.obj `if test -f 'src/pcre2test.c'; then $(CYGPATH_W) 'src/pcre2test.c'; else $(CYGPATH_W) '$(srcdir)/src/pcre2test.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + -rm -rf src/.libs src/_libs + +distclean-libtool: + -rm -f libtool config.lt +install-man1: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +install-man3: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man3dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man3dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man3dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.3[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man3dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man3dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man3dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man3dir)" || exit $$?; }; \ + done; } + +uninstall-man3: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man3dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.3[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man3dir)'; $(am__uninstall_files_from_dir) +install-dist_docDATA: $(dist_doc_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(docdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(docdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(docdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(docdir)" || exit $$?; \ + done + +uninstall-dist_docDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_doc_DATA)'; test -n "$(docdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(docdir)'; $(am__uninstall_files_from_dir) +install-dist_htmlDATA: $(dist_html_DATA) + @$(NORMAL_INSTALL) + @list='$(dist_html_DATA)'; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done + +uninstall-dist_htmlDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_html_DATA)'; test -n "$(htmldir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(htmldir)'; $(am__uninstall_files_from_dir) +install-pkgconfigDATA: $(pkgconfig_DATA) + @$(NORMAL_INSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \ + done + +uninstall-pkgconfigDATA: + @$(NORMAL_UNINSTALL) + @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir) +install-includeHEADERS: $(include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) +install-nodist_includeHEADERS: $(nodist_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ + done + +uninstall-nodist_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscope: cscope.files + test ! -s cscope.files \ + || $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS) +clean-cscope: + -rm -f cscope.files +cscope.files: clean-cscope cscopelist +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: $(check_SCRIPTS) + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_SCRIPTS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +pcre2posix_test.log: pcre2posix_test$(EXEEXT) + @p='pcre2posix_test$(EXEEXT)'; \ + b='pcre2posix_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +pcre2_jit_test.log: pcre2_jit_test$(EXEEXT) + @p='pcre2_jit_test$(EXEEXT)'; \ + b='pcre2_jit_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +RunTest.log: RunTest + @p='RunTest'; \ + b='RunTest'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +RunGrepTest.log: RunGrepTest + @p='RunGrepTest'; \ + b='RunGrepTest'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + $(am__remove_distdir) + test -d "$(distdir)" || mkdir "$(distdir)" + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + -test -n "$(am__skip_mode_fix)" \ + || find "$(distdir)" -type d ! -perm -755 \ + -exec chmod u+rwx,go+rx {} \; -o \ + ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ + ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ + || chmod -R a+r "$(distdir)" +dist-gzip: distdir + tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz + $(am__post_remove_distdir) +dist-bzip2: distdir + tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 + $(am__post_remove_distdir) + +dist-lzip: distdir + tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz + $(am__post_remove_distdir) + +dist-xz: distdir + tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz + $(am__post_remove_distdir) + +dist-zstd: distdir + tardir=$(distdir) && $(am__tar) | zstd -c $${ZSTD_CLEVEL-$${ZSTD_OPT--19}} >$(distdir).tar.zst + $(am__post_remove_distdir) + +dist-tarZ: distdir + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z + $(am__post_remove_distdir) + +dist-shar: distdir + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 + @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 + shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz + $(am__post_remove_distdir) +dist-zip: distdir + -rm -f $(distdir).zip + zip -rq $(distdir).zip $(distdir) + $(am__post_remove_distdir) + +dist dist-all: + $(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:' + $(am__post_remove_distdir) + +# This target untars the dist file and tries a VPATH configuration. Then +# it guarantees that the distribution is self-contained by making another +# tarfile. +distcheck: dist + case '$(DIST_ARCHIVES)' in \ + *.tar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\ + *.tar.bz2*) \ + bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lz*) \ + lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ + *.tar.xz*) \ + xz -dc $(distdir).tar.xz | $(am__untar) ;;\ + *.tar.Z*) \ + uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ + *.shar.gz*) \ + eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\ + *.zip*) \ + unzip $(distdir).zip ;;\ + *.tar.zst*) \ + zstd -dc $(distdir).tar.zst | $(am__untar) ;;\ + esac + chmod -R a-w $(distdir) + chmod u+w $(distdir) + mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + chmod a-w $(distdir) + test -d $(distdir)/_build || exit 0; \ + dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ + && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ + && am__cwd=`pwd` \ + && $(am__cd) $(distdir)/_build/sub \ + && ../../configure \ + $(AM_DISTCHECK_CONFIGURE_FLAGS) \ + $(DISTCHECK_CONFIGURE_FLAGS) \ + --srcdir=../.. --prefix="$$dc_install_base" \ + && $(MAKE) $(AM_MAKEFLAGS) \ + && $(MAKE) $(AM_MAKEFLAGS) $(AM_DISTCHECK_DVI_TARGET) \ + && $(MAKE) $(AM_MAKEFLAGS) check \ + && $(MAKE) $(AM_MAKEFLAGS) install \ + && $(MAKE) $(AM_MAKEFLAGS) installcheck \ + && $(MAKE) $(AM_MAKEFLAGS) uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ + distuninstallcheck \ + && chmod -R a-w "$$dc_install_base" \ + && ({ \ + (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ + && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ + distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ + } || { rm -rf "$$dc_destdir"; exit 1; }) \ + && rm -rf "$$dc_destdir" \ + && $(MAKE) $(AM_MAKEFLAGS) dist \ + && rm -rf $(DIST_ARCHIVES) \ + && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ + && cd "$$am__cwd" \ + || exit 1 + $(am__post_remove_distdir) + @(echo "$(distdir) archives ready for distribution: "; \ + list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ + sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' +distuninstallcheck: + @test -n '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: trying to run $@ with an empty' \ + '$$(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + $(am__cd) '$(distuninstallcheck_dir)' || { \ + echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ + exit 1; \ + }; \ + test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left after uninstall:" ; \ + if test -n "$(DESTDIR)"; then \ + echo " (check DESTDIR support)"; \ + fi ; \ + $(distuninstallcheck_listfiles) ; \ + exit 1; } >&2 +distcleancheck: distclean + @if test '$(srcdir)' = . ; then \ + echo "ERROR: distcleancheck can only run from a VPATH build" ; \ + exit 1 ; \ + fi + @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ + || { echo "ERROR: files left in build directory after distclean:" ; \ + $(distcleancheck_listfiles) ; \ + exit 1; } >&2 +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_SCRIPTS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(PROGRAMS) $(LIBRARIES) $(LTLIBRARIES) $(SCRIPTS) \ + $(MANS) $(DATA) $(HEADERS) +install-binPROGRAMS: install-libLTLIBRARIES + +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(docdir)" "$(DESTDIR)$(htmldir)" "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f .libs/$(am__dirstamp) + -rm -f src/$(DEPDIR)/$(am__dirstamp) + -rm -f src/$(am__dirstamp) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +@WITH_GCOV_FALSE@clean-local: +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ + clean-libtool clean-local clean-noinstLIBRARIES \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo + -rm -f src/$(DEPDIR)/pcre2_dftables.Po + -rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po + -rm -f src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po + -rm -f src/$(DEPDIR)/pcre2test-pcre2test.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-libtool distclean-local distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-dist_docDATA install-dist_htmlDATA \ + install-includeHEADERS install-man \ + install-nodist_includeHEADERS install-pkgconfigDATA + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS install-binSCRIPTS \ + install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 install-man3 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f $(am__CONFIG_DISTCLEAN_FILES) + -rm -rf $(top_srcdir)/autom4te.cache + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_16_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_32_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/_libs_libpcre2_fuzzsupport_a-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_compile_class.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_16_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_compile_class.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_32_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_auto_possess.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chartables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_chkdint.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_compile_class.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_config.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_context.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_convert.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_dfa_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_error.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_extuni.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_find_bracket.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_jit_compile.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_maketables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_match_data.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_newline.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ord2utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_pattern_info.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_script_run.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_serialize.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_string_utils.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_study.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substitute.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_substring.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_tables.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_ucd.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_valid_utf.Plo + -rm -f src/$(DEPDIR)/libpcre2_8_la-pcre2_xclass.Plo + -rm -f src/$(DEPDIR)/libpcre2_posix_la-pcre2posix.Plo + -rm -f src/$(DEPDIR)/pcre2_dftables.Po + -rm -f src/$(DEPDIR)/pcre2_jit_test-pcre2_jit_test.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_16-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_32-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2fuzzcheck_8-pcre2_fuzzsupport.Po + -rm -f src/$(DEPDIR)/pcre2grep-pcre2grep.Po + -rm -f src/$(DEPDIR)/pcre2posix_test-pcre2posix_test.Po + -rm -f src/$(DEPDIR)/pcre2test-pcre2test.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS \ + uninstall-dist_docDATA uninstall-dist_htmlDATA \ + uninstall-includeHEADERS uninstall-libLTLIBRARIES \ + uninstall-man uninstall-nodist_includeHEADERS \ + uninstall-pkgconfigDATA + +uninstall-man: uninstall-man1 uninstall-man3 + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles am--refresh check \ + check-TESTS check-am clean clean-binPROGRAMS clean-cscope \ + clean-generic clean-libLTLIBRARIES clean-libtool clean-local \ + clean-noinstLIBRARIES clean-noinstPROGRAMS cscope \ + cscopelist-am ctags ctags-am dist dist-all dist-bzip2 \ + dist-gzip dist-lzip dist-shar dist-tarZ dist-xz dist-zip \ + dist-zstd distcheck distclean distclean-compile \ + distclean-generic distclean-hdr distclean-libtool \ + distclean-local distclean-tags distcleancheck distdir \ + distuninstallcheck dvi dvi-am html html-am info info-am \ + install install-am install-binPROGRAMS install-binSCRIPTS \ + install-data install-data-am install-dist_docDATA \ + install-dist_htmlDATA install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am \ + install-includeHEADERS install-info install-info-am \ + install-libLTLIBRARIES install-man install-man1 install-man3 \ + install-nodist_includeHEADERS install-pdf install-pdf-am \ + install-pkgconfigDATA install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + recheck tags tags-am uninstall uninstall-am \ + uninstall-binPROGRAMS uninstall-binSCRIPTS \ + uninstall-dist_docDATA uninstall-dist_htmlDATA \ + uninstall-includeHEADERS uninstall-libLTLIBRARIES \ + uninstall-man uninstall-man1 uninstall-man3 \ + uninstall-nodist_includeHEADERS uninstall-pkgconfigDATA + +.PRECIOUS: Makefile + + +# The only difference between pcre2.h.in and pcre2.h is the setting of the PCRE +# version number. Therefore, we can create the generic version just by copying. + +src/pcre2.h.generic: src/pcre2.h.in configure.ac + rm -f $@ + cp -p src/pcre2.h $@ + +# It is more complicated for config.h.generic. We need the version that results +# from a default configuration so as to get all the default values for PCRE +# configuration macros such as MATCH_LIMIT and NEWLINE. We can get this by +# doing a configure in a temporary directory. However, some trickery is needed, +# because the source directory may already be configured. If you just try +# running configure in a new directory, it complains. For this reason, we move +# config.status out of the way while doing the default configuration. The +# resulting config.h is munged by perl to put #ifdefs round any #defines for +# macros with values, and to #undef all boolean macros such as HAVE_xxx and +# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. + +src/config.h.generic: configure.ac + rm -rf $@ _generic + mkdir _generic + cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside + cd _generic && $(abs_top_srcdir)/configure || : + cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs + test -f _generic/src/config.h + perl -n \ + -e 'BEGIN{$$blank=0;}' \ + -e 'if(/(.+?)\s*__attribute__ \(\(visibility/){print"$$1\n";$$blank=0;next;}' \ + -e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \ + -e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \ + -e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \ + -e 'if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}' \ + _generic/src/config.h >$@ + rm -rf _generic +@WITH_REBUILD_CHARTABLES_TRUE@src/pcre2_chartables.c: pcre2_dftables$(EXEEXT) +@WITH_REBUILD_CHARTABLES_TRUE@ rm -f $@ +@WITH_REBUILD_CHARTABLES_TRUE@ ./pcre2_dftables$(EXEEXT) $@ +@WITH_REBUILD_CHARTABLES_FALSE@src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist +@WITH_REBUILD_CHARTABLES_FALSE@ rm -f $@ +@WITH_REBUILD_CHARTABLES_FALSE@ $(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c + +@WITH_GCOV_TRUE@coverage-check: all +@WITH_GCOV_TRUE@ -$(MAKE) $(AM_MAKEFLAGS) -k check + +@WITH_GCOV_TRUE@coverage-baseline: +@WITH_GCOV_TRUE@ $(LCOV) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --directory $(top_builddir) \ +@WITH_GCOV_TRUE@ --output-file "$(COVERAGE_OUTPUT_FILE)" \ +@WITH_GCOV_TRUE@ --capture \ +@WITH_GCOV_TRUE@ --initial + +@WITH_GCOV_TRUE@coverage-report: +@WITH_GCOV_TRUE@ $(LCOV) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --directory $(top_builddir) \ +@WITH_GCOV_TRUE@ --capture \ +@WITH_GCOV_TRUE@ --output-file "$(COVERAGE_OUTPUT_FILE).tmp" \ +@WITH_GCOV_TRUE@ --test-name "$(COVERAGE_TEST_NAME)" \ +@WITH_GCOV_TRUE@ --no-checksum \ +@WITH_GCOV_TRUE@ --compat-libtool \ +@WITH_GCOV_TRUE@ $(COVERAGE_LCOV_EXTRA_FLAGS) +@WITH_GCOV_TRUE@ $(LCOV) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --directory $(top_builddir) \ +@WITH_GCOV_TRUE@ --output-file "$(COVERAGE_OUTPUT_FILE)" \ +@WITH_GCOV_TRUE@ --remove "$(COVERAGE_OUTPUT_FILE).tmp" \ +@WITH_GCOV_TRUE@ "/tmp/*" \ +@WITH_GCOV_TRUE@ "/usr/include/*" \ +@WITH_GCOV_TRUE@ "$(includedir)/*" +@WITH_GCOV_TRUE@ -@rm -f "$(COVERAGE_OUTPUT_FILE).tmp" +@WITH_GCOV_TRUE@ LANG=C $(GENHTML) $(coverage_quiet) \ +@WITH_GCOV_TRUE@ --prefix $(top_builddir) \ +@WITH_GCOV_TRUE@ --output-directory "$(COVERAGE_OUTPUT_DIR)" \ +@WITH_GCOV_TRUE@ --title "$(PACKAGE) $(VERSION) Code Coverage Report" \ +@WITH_GCOV_TRUE@ --show-details "$(COVERAGE_OUTPUT_FILE)" \ +@WITH_GCOV_TRUE@ --legend \ +@WITH_GCOV_TRUE@ $(COVERAGE_GENHTML_EXTRA_FLAGS) +@WITH_GCOV_TRUE@ @echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html" + +@WITH_GCOV_TRUE@coverage-reset: +@WITH_GCOV_TRUE@ -$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir) + +@WITH_GCOV_TRUE@coverage-clean-report: +@WITH_GCOV_TRUE@ -rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp" +@WITH_GCOV_TRUE@ -rm -rf "$(COVERAGE_OUTPUT_DIR)" + +@WITH_GCOV_TRUE@coverage-clean-data: +@WITH_GCOV_TRUE@ -find $(top_builddir) -name "*.gcda" -delete + +@WITH_GCOV_TRUE@coverage-clean: coverage-reset coverage-clean-report coverage-clean-data +@WITH_GCOV_TRUE@ -find $(top_builddir) -name "*.gcno" -delete + +@WITH_GCOV_TRUE@coverage-distclean: coverage-clean + +@WITH_GCOV_TRUE@coverage: coverage-reset coverage-baseline coverage-check coverage-report +@WITH_GCOV_TRUE@clean-local: coverage-clean +@WITH_GCOV_TRUE@distclean-local: coverage-distclean + +@WITH_GCOV_TRUE@.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean + +# Without coverage support, still arrange for 'make distclean' to get rid of +# any coverage files that may have been left from a different configuration. + +@WITH_GCOV_FALSE@coverage: +@WITH_GCOV_FALSE@ @echo "Configuring with --enable-coverage is required to generate code coverage report." + +@WITH_GCOV_FALSE@distclean-local: +@WITH_GCOV_FALSE@ rm -rf $(PACKAGE)-$(VERSION)-coverage* + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/3rd/pcre2/NEWS b/3rd/pcre2/NEWS new file mode 100644 index 00000000..4b5ec1e5 --- /dev/null +++ b/3rd/pcre2/NEWS @@ -0,0 +1,578 @@ +News about PCRE2 releases +------------------------- + +Version 10.45 05-February-2025 +------------------------------ + +This is a comparatively large release, incorporating new features, some +bugfixes, and a few changes with slight backwards compatibility implications. +Please see the ChangeLog and Git log for further details. + +Only changes to behaviour, changes to the API, and major changes to the pattern +syntax are described here. + +This release is the first to be available as a (signed) Git tag, or +alternatively as a (signed) tarball of the Git tag. + +This is also the first release to be made by the new maintainers of PCRE2, and +we would like to thank Philip Hazel, creator and maintainer of PCRE and PCRE2. + +* (Git change) The sljit project has been split out into a separate Git + repository. Git users must now run `git submodule init; git submodule update` + after a Git checkout. + +* (Behaviour change) Update Unicode support to UCD 16. + +* (Match behaviour change) Case-insensitive matching of Unicode properties + Ll, Lt, and Lu has been changed to match Perl. Previously, /\p{Ll}/i would + match only lower-case characters (even though case-insensitive matching was + specified). This also affects case-insensitive matching of POSIX classes such + as [:lower:]. + +* (Minor match behaviour change) Case-insensitive matching of backreferences now + respects the PCRE2_EXTRA_CASELESS_RESTRICT option. + +* (Minor pattern syntax change) Parsing of the \x escape is stricter, and is + no longer parsed as an escape for the NUL character if not followed by '{' or + a hexadecimal digit. Use \x00 instead. + +* (Major new feature) Add a new feature called scan substring. This is a new + type of assertion which matches the content of a capturing block to a + sub-pattern. + + Example: to find a word that contains the rare (in English) sequence of + letters "rh" not at the start: + + \b(\w++)(*scan_substring:(1).+rh) + + The first group captures a word which is then scanned by the + (*scan_substring:(1) ... ) assertion, which tests whether the pattern ".+rh" + matches the capture group "(1)". + +* (Major new feature) Add support for UTS#18 compatible character classes, + using the new option PCRE2_ALT_EXTENDED_CLASS. This adds '[' as a + metacharacter within character classes and the operators '&&', '--' and '~~', + allowing subtractions and intersections of character classes to be easily + expressed. + + Example: to match Thai or Greek letters (but not letters or other characters + in those scripts), use [\p{L}&&[\p{Thai}||\p{Greek}]]. + +* (Major new feature) Add support for Perl-style extended character classes, + using the syntax (?[...]). This also allows expressing subtractions and + intersections of character classes, but using a different syntax to UTS#18. + + Example: to match Thai or Greek letters (but not letters or other characters + in those scripts), use (?[\p{L} & (\p{Thai} + \p{Greek})]). + +* (Minor feature) Significant improvements to the character class match engine. + Compiled character classes are now more compact, and have faster matching + for large or complex character sets, using binary search through the set. + +* JIT compilation now fails with the new error code PCRE2_ERROR_JIT_UNSUPPORTED + for patterns which use features not supported by the JIT compiler. + +* (Minor feature) New options PCRE2_EXTRA_NO_BS0 (disallow \0 as an escape for + the NUL character); PCRE2_EXTRA_PYTHON_OCTAL (use Python disambiguation rules + for deciding whether \12 is a backreference or an octal escape); + PCRE2_EXTRA_NEVER_CALLOUT (disable callout syntax entirely); + PCRE2_EXTRA_TURKISH_CASING (use Turkish rules for case-insensitive matching). + +* (Minor feature) Add new API function pcre2_set_optimize() for controlling + which optimizations are enabled. + +* (Minor new features) A variety of extensions have been made to + pcre2_substitute() and its syntax for replacement strings. These now support: + \123 octal escapes; titlecasing \u\L; \1 backreferences; \g<1> and $ + backreferences; $& $` $' and $_; new function + pcre2_set_substitute_case_callout() to allow locale-aware case transformation. + + +Version 10.44 07-June-2024 +-------------------------- + +This is mostly a bug-fix and tidying release. There is one new function, to set +a maximum size for a compiled pattern. The maximum name length for groups is +increased to 128. Some auxiliary files for building under VMS are added. + + +Version 10.43 16-February-2024 +------------------------------ + +There are quite a lot of changes in this release (see ChangeLog and Git log for +a list). Those that are not bugfixes or code tidies are: + +* The JIT code no longer supports ARMv5 architecture. + +* A new function pcre2_get_match_data_heapframes_size() for finer heap control. + +* New option flags to restrict the interaction between ASCII and non-ASCII + characters for caseless matching and \d and friends. There are also new + pattern constructs to control these flags from within a pattern. + +* Upgrade to Unicode 15.0.0. + +* Treat a NULL pattern with zero length as an empty string. + +* Added support for limited-length variable-length lookbehind assertions, with + a default maximum length of 255 characters (same as Perl) but with a function + to adjust the limit. + +* Support for LoongArch in JIT. + +* Perl changed the meaning of (for example) {,3} which did not used to be + recognized as a quantifier. Now it means {0,3} and PCRE2 has also changed. + Note that {,} is still not a quantifier. + +* Following Perl, allow spaces and tabs after { and before } in all Perl- + compatible items that use braces, and also around commas in quantifiers. The + one exception in PCRE2 is \u{...}, which is from ECMAScript, not Perl, and + PCRE2 follows ECMAScript usage. + +* Changed the meaning of \w and its synonyms and derivatives (\b and \B) in UCP + mode to follow Perl. It now matches characters whose general categories are L + or N or whose particular categories are Mn (non-spacing mark) or Pc + (combining punctuation). + +* Changed the default meaning of [:xdigit:] in UCP mode to follow Perl. It now + matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can + be used to keep it ASCII only. + +* Make PCRE2_UCP the default in UTF mode in pcre2grep and add --no-ucp, + --case-restrict and --posix-digit. + +* Add --group-separator and --no-group-separator to pcre2grep. + + +Version 10.42 11-December-2022 +------------------------------ + +This is an unexpectedly early release to fix a problem that was introduced in +10.41. ChangeLog number 19 (GitHub #139) added the default definition of +PCRE2_CALL_CONVENTION to pcre2posix.c instead of pcre2posix.h, which meant that +programs including pcre2posix.h but not pcre2.h couldn't compile. A new test +that checks this case has been added. + +A couple of other minor issues are also fixed, and a patch for an intermittent +JIT fault is also included. See ChangeLog and the Git log. + + +Version 10.41 06-December-2022 +------------------------------ + +This is another mainly bug-fixing and code-tidying release. There is one +significant upgrade to pcre2grep: it now behaves like GNU grep when matching +more than one pattern and a later pattern matches at an earlier point in the +subject when the matched substrings are being identified by colour or by +offsets. + + +Version 10.40 15-April-2022 +--------------------------- + +This is mostly a bug-fixing and code-tidying release. However, there are some +extensions to Unicode property handling: + +* Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +* A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +As always, see ChangeLog for a list of all changes (also the Git log). + + +Version 10.39 29-October-2021 +----------------------------- + +This release is happening soon after 10.38 because the bug fix is important. + +1. Fix incorrect detection of alternatives in first character search in JIT. + +2. Update to Unicode 14.0.0. + +3. Some code cleanups (see ChangeLog). + + +Version 10.38 01-October-2021 +----------------------------- + +As well as some bug fixes and tidies (as always, see ChangeLog for details), +the documentation is updated to list the new URLs, following the move of the +source repository to GitHub and the mailing list to Google Groups. + +* The CMake build system can now build both static and shared libraries in one +go. + +* Following Perl's lead, \K is now locked out in lookaround assertions by +default, but an option is provided to re-enable the previous behaviour. + + +Version 10.37 26-May-2021 +------------------------- + +A few more bug fixes and tidies. The only change of real note is the removal of +the actual POSIX names regcomp etc. from the POSIX wrapper library because +these have caused issues for some applications (see 10.33 #2 below). + + +Version 10.36 04-December-2020 +------------------------------ + +Again, mainly bug fixes and tidies. The only enhancements are the addition of +GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the +handling of substitution strings for both -O and callouts in pcre2grep, with +the addition of $x{...} and $o{...} to allow for characters whose code points +are greater than 255 in Unicode mode. + +NOTE: there is an outstanding issue with JIT support for MacOS on arm64 +hardware. For details, please see Bugzilla issue #2618. + + +Version 10.35 15-April-2020 +--------------------------- + +Bugfixes, tidies, and a few new enhancements. + +1. Capturing groups that contain recursive backreferences to themselves are no +longer automatically atomic, because the restriction is no longer necessary +as a result of the 10.30 restructuring. + +2. Several new options for pcre2_substitute(). + +3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode +character properties are used for upper/lower case computations on characters +whose code points are greater than 127. + +4. The character tables (for low-valued characters) can now more easily be +saved and restored in binary. + +5. Updated to Unicode 13.0.0. + + +Version 10.34 21-November-2019 +------------------------------ + +Another release with a few enhancements as well as bugfixes and tidies. The +main new features are: + +1. There is now some support for matching in invalid UTF strings. + +2. Non-atomic positive lookarounds are implemented in the pcre2_match() +interpreter, but not in JIT. + +3. Added two new functions: pcre2_get_match_data_size() and +pcre2_maketables_free(). + +4. Upgraded to Unicode 12.1.0. + + +Version 10.33 16-April-2019 +--------------------------- + +Yet more bugfixes, tidies, and a few enhancements, summarized here (see +ChangeLog for the full list): + +1. Callouts from pcre2_substitute() are now available. + +2. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper +functions that use the standard POSIX names. However, in pcre2posix.h the POSIX +names are defined as macros. This should help avoid linking with the wrong +library in some environments, while still exporting the POSIX names for +pre-existing programs that use them. + +3. Some new options: + + (a) PCRE2_EXTRA_ESCAPED_CR_IS_LF makes \r behave as \n. + + (b) PCRE2_EXTRA_ALT_BSUX enables support for ECMAScript 6's \u{hh...} + construct. + + (c) PCRE2_COPY_MATCHED_SUBJECT causes a copy of a matched subject to be + made, instead of just remembering a pointer. + +4. Some new Perl features: + + (a) Perl 5.28's experimental alphabetic names for atomic groups and + lookaround assertions, for example, (*pla:...) and (*atomic:...). + + (b) The new Perl "script run" features (*script_run:...) and + (*atomic_script_run:...) aka (*sr:...) and (*asr:...). + + (c) When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in + capture group names. + +5. --disable-percent-zt disables the use of %zu and %td in formatting strings +in pcre2test. They were already automatically disabled for VC and older C +compilers. + +6. Some changes related to callouts in pcre2grep: + + (a) Support for running an external program under VMS has been added, in + addition to Windows and fork() support. + + (b) --disable-pcre2grep-callout-fork restricts the callout support in + to the inbuilt echo facility. + + +Version 10.32 10-September-2018 +------------------------------- + +This is another mainly bugfix and tidying release with a few minor +enhancements. These are the main ones: + +1. pcre2grep now supports the inclusion of binary zeros in patterns that are +read from files via the -f option. + +2. ./configure now supports --enable-jit=auto, which automatically enables JIT +if the hardware supports it. + +3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for +local workspace and local ovectors. Instead, an initial block of stack is +reserved, but if this is insufficient, heap memory is used. The heap limit +parameter now applies to pcre2_dfa_match(). + +4. Updated to Unicode version 11.0.0. + +5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported. + +6. Added support for \N{U+dddd}, but only in Unicode mode. + +7. Added support for (?^) to unset all imnsx options. + + +Version 10.31 12-February-2018 +------------------------------ + +This is mainly a bugfix and tidying release (see ChangeLog for full details). +However, there are some minor enhancements. + +1. New pcre2_config() options: PCRE2_CONFIG_NEVER_BACKSLASH_C and +PCRE2_CONFIG_COMPILED_WIDTHS. + +2. New pcre2_pattern_info() option PCRE2_INFO_EXTRAOPTIONS to retrieve the +extra compile time options. + +3. There are now public names for all the pcre2_compile() error numbers. + +4. Added PCRE2_CALLOUT_STARTMATCH and PCRE2_CALLOUT_BACKTRACK bits to a new +field callout_flags in callout blocks. + + +Version 10.30 14-August-2017 +---------------------------- + +The full list of changes that includes bugfixes and tidies is, as always, in +ChangeLog. These are the most important new features: + +1. The main interpreter, pcre2_match(), has been refactored into a new version +that does not use recursive function calls (and therefore the system stack) for +remembering backtracking positions. This makes --disable-stack-for-recursion a +NOOP. The new implementation allows backtracking into recursive group calls in +patterns, making it more compatible with Perl, and also fixes some other +previously hard-to-do issues. For patterns that have a lot of backtracking, the +heap is now used, and there is an explicit limit on the amount, settable by +pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). The "recursion limit" is retained, +but is renamed as "depth limit" (though the old names remain for +compatibility). + +There is also a change in the way callouts from pcre2_match() are handled. The +offset_vector field in the callout block is no longer a pointer to the +actual ovector that was passed to the matching function in the match data +block. Instead it points to an internal ovector of a size large enough to hold +all possible captured substrings in the pattern. + +2. The new option PCRE2_ENDANCHORED insists that a pattern match must end at +the end of the subject. + +3. The new option PCRE2_EXTENDED_MORE implements Perl's /xx feature, and +pcre2test is upgraded to support it. Setting within the pattern by (?xx) is +also supported. + +4. (?n) can be used to set PCRE2_NO_AUTO_CAPTURE, because Perl now has this. + +5. Additional compile options in the compile context are now available, and the +first two are: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES and +PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL. + +6. The newline type PCRE2_NEWLINE_NUL is now available. + +7. The match limit value now also applies to pcre2_dfa_match() as there are +patterns that can use up a lot of resources without necessarily recursing very +deeply. + +8. The option REG_PEND (a GNU extension) is now available for the POSIX +wrapper. Also there is a new option PCRE2_LITERAL which is used to support +REG_NOSPEC. + +9. PCRE2_EXTRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are implemented for the +benefit of pcre2grep, and pcre2grep's -F, -w, and -x options are re-implemented +using PCRE2_LITERAL, PCRE2_EXTRA_MATCH_WORD, and PCRE2_EXTRA_MATCH_LINE. This +is tidier and also fixes some bugs. + +10. The Unicode tables are upgraded from Unicode 8.0.0 to Unicode 10.0.0. + +11. There are some experimental functions for converting foreign patterns +(globs and POSIX patterns) into PCRE2 patterns. + + +Version 10.23 14-February-2017 +------------------------------ + +1. ChangeLog has the details of a lot of bug fixes and tidies. + +2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax +checking is now done in the pre-pass that identifies capturing groups. This has +reduced the amount of duplication and made the code tidier. While doing this, +some minor bugs and Perl incompatibilities were fixed (see ChangeLog for +details.) + +3. Back references are now permitted in lookbehind assertions when there are +no duplicated group numbers (that is, (?| has not been used), and, if the +reference is by name, there is only one group of that name. The referenced +group must, of course be of fixed length. + +4. \g{+} (e.g. \g{+2} ) is now supported. It is a "forward back +reference" and can be useful in repetitions (compare \g{-} ). Perl does +not recognize this syntax. + +5. pcre2grep now automatically expands its buffer up to a maximum set by +--max-buffer-size. + +6. The -t option (grand total) has been added to pcre2grep. + +7. A new function called pcre2_code_copy_with_tables() exists to copy a +compiled pattern along with a private copy of the character tables that is +uses. + +8. A user supplied a number of patches to upgrade pcre2grep under Windows and +tidy the code. + +9. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.22 29-July-2016 +-------------------------- + +1. ChangeLog has the details of a number of bug fixes. + +2. The POSIX wrapper function regcomp() did not used to support back references +and subroutine calls if called with the REG_NOSUB option. It now does. + +3. A new function, pcre2_code_copy(), is added, to make a copy of a compiled +pattern. + +4. Support for string callouts is added to pcre2grep. + +5. Added the PCRE2_NO_JIT option to pcre2_match(). + +6. The pcre2_get_error_message() function now returns with a negative error +code if the error number it is given is unknown. + +7. Several updates have been made to pcre2test and test scripts (see +ChangeLog). + + +Version 10.21 12-January-2016 +----------------------------- + +1. Many bugs have been fixed. A large number of them were provoked only by very +strange pattern input, and were discovered by fuzzers. Some others were +discovered by code auditing. See ChangeLog for details. + +2. The Unicode tables have been updated to Unicode version 8.0.0. + +3. For Perl compatibility in EBCDIC environments, ranges such as a-z in a +class, where both values are literal letters in the same case, omit the +non-letter EBCDIC code points within the range. + +4. There have been a number of enhancements to the pcre2_substitute() function, +giving more flexibility to replacement facilities. It is now also possible to +cause the function to return the needed buffer size if the one given is too +small. + +5. The PCRE2_ALT_VERBNAMES option causes the "name" parts of special verbs such +as (*THEN:name) to be processed for backslashes and to take note of +PCRE2_EXTENDED. + +6. PCRE2_INFO_HASBACKSLASHC makes it possible for a client to find out if a +pattern uses \C, and --never-backslash-C makes it possible to compile a version +PCRE2 in which the use of \C is always forbidden. + +7. A limit to the length of pattern that can be handled can now be set by +calling pcre2_set_max_pattern_length(). + +8. When matching an unanchored pattern, a match can be required to begin within +a given number of code units after the start of the subject by calling +pcre2_set_offset_limit(). + +9. The pcre2test program has been extended to test new facilities, and it can +now run the tests when LF on its own is not a valid newline sequence. + +10. The RunTest script has also been updated to enable more tests to be run. + +11. There have been some minor performance enhancements. + + +Version 10.20 30-June-2015 +-------------------------- + +1. Callouts with string arguments and the pcre2_callout_enumerate() function +have been implemented. + +2. The PCRE2_NEVER_BACKSLASH_C option, which locks out the use of \C, is added. + +3. The PCRE2_ALT_CIRCUMFLEX option lets ^ match after a newline at the end of a +subject in multiline mode. + +4. The way named subpatterns are handled has been refactored. The previous +approach had several bugs. + +5. The handling of \c in EBCDIC environments has been changed to conform to the +perlebcdic document. This is an incompatible change. + +6. Bugs have been mended, many of them discovered by fuzzers. + + +Version 10.10 06-March-2015 +--------------------------- + +1. Serialization and de-serialization functions have been added to the API, +making it possible to save and restore sets of compiled patterns, though +restoration must be done in the same environment that was used for compilation. + +2. The (*NO_JIT) feature has been added; this makes it possible for a pattern +creator to specify that JIT is not to be used. + +3. A number of bugs have been fixed. In particular, bugs that caused building +on Windows using CMake to fail have been mended. + + +Version 10.00 05-January-2015 +----------------------------- + +Version 10.00 is the first release of PCRE2, a revised API for the PCRE +library. Changes prior to 10.00 are logged in the ChangeLog file for the old +API, up to item 20 for release 8.36. New programs are recommended to use the +new library. Programs that use the original (PCRE1) API will need changing +before linking with the new library. + +**** diff --git a/3rd/pcre2/NON-AUTOTOOLS-BUILD b/3rd/pcre2/NON-AUTOTOOLS-BUILD new file mode 100644 index 00000000..bb687f7d --- /dev/null +++ b/3rd/pcre2/NON-AUTOTOOLS-BUILD @@ -0,0 +1,442 @@ +Building PCRE2 without using autotools +-------------------------------------- + +This document contains the following sections: + + General + Generic instructions for the PCRE2 C libraries + Stack size in Windows environments + Linking programs in Windows environments + Calling conventions in Windows environments + Comments about Win32 builds + Building PCRE2 on Windows with CMake + Building PCRE2 on Windows with Visual Studio + Testing with RunTest.bat + Building PCRE2 on native z/OS and z/VM + Building PCRE2 under VMS + + +GENERAL + +The source of the PCRE2 libraries consists entirely of code written in Standard +C, and so should compile successfully on any system that has a Standard C +compiler and library. + +The PCRE2 distribution includes a "configure" file for use by the +configure/make (autotools) build system, as found in many Unix-like +environments. The README file contains information about the options for +"configure". + +There is also support for CMake, which some users prefer, especially in Windows +environments, though it can also be run in Unix-like environments. See the +section entitled "Building PCRE2 on Windows with CMake" below. + +Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs +under the names src/config.h.generic and src/pcre2.h.generic. These are +provided for those who build PCRE2 without using "configure" or CMake. If you +use "configure" or CMake, the .generic versions are not used. + + +GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES + +There are three possible PCRE2 libraries, each handling data with a specific +code unit width: 8, 16, or 32 bits. You can build any combination of them. The +following are generic instructions for building a PCRE2 C library "by hand". If +you are going to use CMake, this section does not apply to you; you can skip +ahead to the CMake section. Note that the settings concerned with 8-bit, +16-bit, and 32-bit code units relate to the type of data string that PCRE2 +processes. They are NOT referring to the underlying operating system bit width. +You do not have to do anything special to compile in a 64-bit environment, for +example. + + (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the + macro settings that it contains to whatever is appropriate for your + environment. In particular, you can alter the definition of the NEWLINE + macro to specify what character(s) you want to be interpreted as line + terminators by default. You need to #define at least one of + SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which + libraries you are going to build. You must set all that apply. + + When you subsequently compile any of the PCRE2 modules, you must specify + -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the + sources. + + An alternative approach is not to edit src/config.h, but to use -D on the + compiler command line to make any changes that you need to the + configuration options. In this case -DHAVE_CONFIG_H must not be set. + + NOTE: There have been occasions when the way in which certain parameters + in src/config.h are used has changed between releases. (In the + configure/make world, this is handled automatically.) When upgrading to a + new release, you are strongly advised to review src/config.h.generic + before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_DLFCN_H). + + (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. + + (3) EITHER: + Copy or rename file src/pcre2_chartables.c.dist as + src/pcre2_chartables.c. + + OR: + Compile src/pcre2_dftables.c as a stand-alone program (using + -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with + the single argument "src/pcre2_chartables.c". This generates a set of + standard character tables and writes them to that file. The tables are + generated using the default C locale for your system. If you want to use + a locale that is specified by LC_xxx environment variables, add the -L + option to the pcre2_dftables command. You must use this method if you + are building on a system that uses EBCDIC code. + + The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can + specify alternative tables at run time. + + (4) For a library that supports 8-bit code units in the character strings that + it processes, compile the following source files from the src directory, + setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set + -DHAVE_CONFIG_H if you have set up src/config.h with your configuration, + or else use other -D settings to change the configuration as required. + + pcre2_auto_possess.c + pcre2_chkdint.c + pcre2_chartables.c + pcre2_compile.c + pcre2_compile_class.c + pcre2_config.c + pcre2_context.c + pcre2_convert.c + pcre2_dfa_match.c + pcre2_error.c + pcre2_extuni.c + pcre2_find_bracket.c + pcre2_jit_compile.c + pcre2_maketables.c + pcre2_match.c + pcre2_match_data.c + pcre2_newline.c + pcre2_ord2utf.c + pcre2_pattern_info.c + pcre2_script_run.c + pcre2_serialize.c + pcre2_string_utils.c + pcre2_study.c + pcre2_substitute.c + pcre2_substring.c + pcre2_tables.c + pcre2_ucd.c + pcre2_valid_utf.c + pcre2_xclass.c + + Make sure that you include -I. in the compiler command (or equivalent for + an unusual compiler) so that all included PCRE2 header files are first + sought in the src directory under the current directory. Otherwise you run + the risk of picking up a previously-installed file from somewhere else. + + Note that you must compile pcre2_jit_compile.c, even if you have not + defined SUPPORT_JIT in src/config.h, because when JIT support is not + configured, dummy functions are compiled. When JIT support IS configured, + pcre2_jit_compile.c #includes other files from the sljit dependency, + all of whose names begin with "sljit". It also #includes + src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile + those yourself. + + Note also that the pcre2_fuzzsupport.c file contains special code that is + useful to those who want to run fuzzing tests on the PCRE2 library. Unless + you are doing that, you can ignore it. + + (5) Now link all the compiled code into an object library in whichever form + your system keeps such libraries. This is the PCRE2 C 8-bit library, + typically called something like libpcre2-8. If your system has static and + shared libraries, you may have to do this once for each type. + + (6) If you want to build a library that supports 16-bit or 32-bit code units, + set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4 + above. If you want to build more than one PCRE2 library, repeat steps 4 + and 5 as necessary. + + (7) If you want to build the POSIX wrapper functions (which apply only to the + 8-bit library), ensure that you have the src/pcre2posix.h file and then + compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix + library. If targeting a DLL in Windows, make sure to include + -DPCRE2POSIX_SHARED with your compiler flags. + + (8) The pcre2test program can be linked with any combination of the 8-bit, + 16-bit and 32-bit libraries (depending on what you specfied in + src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if + necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the + appropriate library/ies. If you compiled an 8-bit library, pcre2test also + needs the pcre2posix wrapper library. + + (9) Run pcre2test on the testinput files in the testdata directory, and check + that the output matches the corresponding testoutput files. There are + comments about what each test does in the section entitled "Testing PCRE2" + in the README file. If you compiled more than one of the 8-bit, 16-bit and + 32-bit libraries, you need to run pcre2test with the -16 option to do + 16-bit tests and with the -32 option to do 32-bit tests. + + Some tests are relevant only when certain build-time options are selected. + For example, test 4 is for Unicode support, and will not run if you have + built PCRE2 without it. See the comments at the start of each testinput + file. If you have a suitable Unix-like shell, the RunTest script will run + the appropriate tests for you. The command "RunTest list" will output a + list of all the tests. + + Note that the supplied files are in Unix format, with just LF characters + as line terminators. You may need to edit them to change this if your + system uses a different convention. + +(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested + by running pcre2test with the -jit option. This is done automatically by + the RunTest script. You might also like to build and run the freestanding + JIT test program, src/pcre2_jit_test.c. + +(11) The pcre2test program tests the POSIX wrapper library, but there is also a + freestanding test program in src/pcre2posix_test.c. It must be linked with + both the pcre2posix library and the 8-bit PCRE2 library. + +(12) If you want to use the pcre2grep command, compile and link + src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need + the pcre2posix library). If you have built the PCRE2 library with JIT + support by defining SUPPORT_JIT in src/config.h, you can also define + SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless + it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without + defining SUPPORT_JIT, pcre2grep does not try to make use of JIT. + + +STACK SIZE IN WINDOWS ENVIRONMENTS + +Prior to release 10.30 the default system stack size of 1MiB in some Windows +environments caused issues with some tests. This should no longer be the case +for 10.30 and later releases. + + +LINKING PROGRAMS IN WINDOWS ENVIRONMENTS + +If you want to statically link a program against a PCRE2 library in the form of +a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h. + + +CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS + +It is possible to compile programs to use different calling conventions using +MSVC. Search the web for "calling conventions" for more information. To make it +easier to change the calling convention for the exported functions in a +PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external +definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is +not set, it defaults to empty; the default calling convention is then used +(which is what is wanted most of the time). + + +COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE") + +There are two ways of building PCRE2 using the "configure, make, make install" +paradigm on Windows systems: using MinGW or using Cygwin. These are not at all +the same thing; they are completely different from each other. There is also +support for building using CMake, which some users find a more straightforward +way of building PCRE2 under Windows. + +The MinGW home page (http://www.mingw.org/) says this: + + MinGW: A collection of freely available and freely distributable Windows + specific header files and import libraries combined with GNU toolsets that + allow one to produce native Windows programs that do not rely on any + 3rd-party C runtime DLLs. + +The Cygwin home page (http://www.cygwin.com/) says this: + + Cygwin is a Linux-like environment for Windows. It consists of two parts: + + . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing + substantial Linux API functionality + + . A collection of tools which provide Linux look and feel. + +On both MinGW and Cygwin, PCRE2 should build correctly using: + + ./configure && make && make install + +This should create two libraries called libpcre2-8 and libpcre2-posix. These +are independent libraries: when you link with libpcre2-posix you must also link +with libpcre2-8, which contains the basic functions. + +Using Cygwin's compiler generates libraries and executables that depend on +cygwin1.dll. If a library that is generated this way is distributed, +cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL +licence, this forces not only PCRE2 to be under the GPL, but also the entire +application. A distributor who wants to keep their own code proprietary must +purchase an appropriate Cygwin licence. + +MinGW has no such restrictions. The MinGW compiler generates a library or +executable that can run standalone on Windows without any third party dll or +licensing issues. + +But there is more complication: + +If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is +to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a +front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's +gcc and MinGW's gcc). So, a user can: + +. Build native binaries by using MinGW or by getting Cygwin and using + -mno-cygwin. + +. Build binaries that depend on cygwin1.dll by using Cygwin with the normal + compiler flags. + +The test files that are supplied with PCRE2 are in UNIX format, with LF +characters as line terminators. Unless your PCRE2 library uses a default +newline option that includes LF as a valid newline, it may be necessary to +change the line terminators in the test files to get some of the tests to work. + + +BUILDING PCRE2 ON WINDOWS WITH CMAKE + +CMake is an alternative configuration facility that can be used instead of +"configure". CMake creates project files (make files, solution files, etc.) +tailored to numerous development environments, including Visual Studio, +Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no +spaces in the names for your CMake installation and your PCRE2 source and build +directories. + +If you are using CMake and encounter errors, deleting the CMake cache and +restarting from a fresh build may fix the error. In the CMake GUI, the cache can +be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can +be deleted. + +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. + +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. + +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. + +4. Run CMake. + + - Using the CLI, simply run `cmake ..` inside the `build/` directory. You can + use the `ccmake` ncurses GUI to select and configure PCRE2 features. + + - Using the CMake GUI: + + a) Run cmake-gui from the Shell environment of your build tool, for + example, Msys for Msys/MinGW or Visual Studio Command Prompt for + VC/VC++. + + b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. + + c) Press the "Configure" button. + + d) Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) + + e) The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. + + f) Press "Configure" again. The adjacent "Generate" button should now be + active. + + g) Press "Generate". + +5. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + + Regardless of build system used, `cmake --build .` will build it. + +6. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + Regardless of build system used, `ctest` will run the tests. + + +BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO + +The code currently cannot be compiled without an inttypes.h header, which is +available only with Visual Studio 2013 or newer. However, this portable and +permissively-licensed implementation of the stdint.h header could be used as an +alternative: + + http://www.azillionmonkeys.com/qed/pstdint.h + +Just rename it and drop it into the top level of the build tree. + + +TESTING WITH RUNTEST.BAT + +If configured with CMake, building the test project ("make test" or building +ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending +on your configuration options, possibly other test programs) in the build +directory. The pcre2_test.bat script runs RunTest.bat with correct source and +exe paths. + +For manual testing with RunTest.bat, provided the build dir is a subdirectory +of the source directory: Open command shell window. Chdir to the location +of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with +"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. + +To run only a particular test with RunTest.Bat provide a test number argument. + +Otherwise: + +1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe + have been created. + +2. Edit RunTest.bat to identify the full or relative location of + the pcre2 source (wherein which the testdata folder resides), e.g.: + + set srcdir=C:\pcre2\pcre2-10.00 + +3. In a Windows command environment, chdir to the location of your bat and + exe programs. + +4. Run RunTest.bat. Test outputs will automatically be compared to expected + results, and discrepancies will be identified in the console output. + +To independently test the just-in-time compiler, run pcre2_jit_test.exe. + + +BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM + +z/OS and z/VM are operating systems for mainframe computers, produced by IBM. +The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and +applications can be supported through UNIX System Services, and in such an +environment it should be possible to build PCRE2 in the same way as in other +systems, with the EBCDIC related configuration settings, but it is not known if +anybody has tried this. + +In native z/OS (without UNIX System Services) and in z/VM, special ports are +required. For details, please see file 939 on this web site: + + http://www.cbttape.org + +Everything in that location, source and executable, is in EBCDIC and native +z/OS file formats. The port provides an API for LE languages such as COBOL and +for the z/OS and z/VM versions of the Rexx languages. + + +BUILDING PCRE2 UNDER VMS + +Alexey Chuphin has contributed some auxiliary files for building PCRE2 under +OpenVMS. They are in the "vms" directory in the distribution tarball. Please +read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep +programs contain some VMS-specific code. + +============================== +Last updated: 26 December 2024 +============================== + diff --git a/3rd/pcre2/README b/3rd/pcre2/README new file mode 100644 index 00000000..5a50f7f1 --- /dev/null +++ b/3rd/pcre2/README @@ -0,0 +1,970 @@ +README file for PCRE2 (Perl-compatible regular expression library) +------------------------------------------------------------------ + +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features, and the internals have been improved. The original PCRE1 library is +now obsolete and no longer maintained. The latest release of PCRE2 is available +in .tar.gz, tar.bz2, or .zip form from this GitHub repository: + +https://github.com/PCRE2Project/pcre2/releases + +There is a mailing list for discussion about the development of PCRE2 at +pcre2-dev@googlegroups.com. You can subscribe by sending an email to +pcre2-dev+subscribe@googlegroups.com. + +You can access the archives and also subscribe or manage your subscription +here: + +https://groups.google.com/g/pcre2-dev + +Please read the NEWS file if you are upgrading from a previous release. The +contents of this README file are: + + The PCRE2 APIs + Documentation for PCRE2 + Building PCRE2 on non-Unix-like systems + Building PCRE2 without using autotools + Building PCRE2 using autotools + Retrieving configuration information + Shared libraries + Cross-compiling using autotools + Making new tarballs + Testing PCRE2 + Character tables + File manifest + + +The PCRE2 APIs +-------------- + +PCRE2 is written in C, and it has its own API. There are three sets of +functions, one for the 8-bit library, which processes strings of bytes, one for +the 16-bit library, which processes strings of 16-bit values, and one for the +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. + +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. + +The header file for the POSIX-style functions is called pcre2posix.h. The +official POSIX name is regex.h, but I did not want to risk possible problems +with existing files of that name by distributing it that way. To use PCRE2 with +an existing program that uses the POSIX API, pcre2posix.h will have to be +renamed or pointed at by a link (or the program modified, of course). See the +pcre2posix documentation for more details. + + +Documentation for PCRE2 +----------------------- + +If you install PCRE2 in the normal way on a Unix-like system, you will end up +with a set of man pages whose names all start with "pcre2". The one that is +just called "pcre2" lists all the others. In addition to these man pages, the +PCRE2 documentation is supplied in two other forms: + + 1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and + doc/pcre2test.txt in the source distribution. The first of these is a + concatenation of the text forms of all the section 3 man pages except the + listing of pcre2demo.c and those that summarize individual functions. The + other two are the text forms of the section 1 man pages for the pcre2grep + and pcre2test commands. These text forms are provided for ease of scanning + with text editors or similar tools. They are installed in + /share/doc/pcre2, where is the installation prefix + (defaulting to /usr/local). + + 2. A set of files containing all the documentation in HTML form, hyperlinked + in various ways, and rooted in a file called index.html, is distributed in + doc/html and installed in /share/doc/pcre2/html. + + +Building PCRE2 on non-Unix-like systems +--------------------------------------- + +For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if +your system supports the use of "configure" and "make" you may be able to build +PCRE2 using autotools in the same way as for many Unix-like systems. + +PCRE2 can also be configured using CMake, which can be run in various ways +(command line, GUI, etc). This creates Makefiles, solution files, etc. The file +NON-AUTOTOOLS-BUILD has information about CMake. + +PCRE2 has been compiled on many different operating systems. It should be +straightforward to build PCRE2 on any system that has a Standard C compiler and +library, because it uses only Standard C functions. + + +Building PCRE2 without using autotools +-------------------------------------- + +The use of autotools (in particular, libtool) is problematic in some +environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD +file for ways of building PCRE2 without using autotools. + + +Building PCRE2 using autotools +------------------------------ + +The following instructions assume the use of the widely used "configure; make; +make install" (autotools) process. + +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set +to the directory where you want the files to be created. This command is a +standard GNU "autoconf" configuration script, for which generic instructions +are supplied in the file INSTALL. + +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + +Most commonly, people build PCRE2 within its own distribution directory, and in +this case, on many systems, just running "./configure" is sufficient. However, +the usual methods of changing standard defaults are available. For example: + +CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local + +This command specifies that the C compiler should be run with the flags '-O2 +-Wall' instead of the default, and that "make install" should install PCRE2 +under /opt/local instead of the default /usr/local. + +If you want to build in a different directory, just run "configure" with that +directory as current. For example, suppose you have unpacked the PCRE2 source +into /source/pcre2/pcre2-xxx, but you want to build it in +/build/pcre2/pcre2-xxx: + +cd /build/pcre2/pcre2-xxx +/source/pcre2/pcre2-xxx/configure + +PCRE2 is written in C and is normally compiled as a C library. However, it is +possible to build it as a C++ library, though the provided building apparatus +does not have any features to support this. + +There are some optional features that can be included or omitted from the PCRE2 +library. They are also documented in the pcre2build man page. + +. By default, both shared and static libraries are built. You can change this + by adding one of these options to the "configure" command: + + --disable-shared + --disable-static + + Setting --disable-shared ensures that PCRE2 libraries are built as static + libraries. The binaries that are then created as part of the build process + (for example, pcre2test and pcre2grep) are linked statically with one or more + PCRE2 libraries, but may also be dynamically linked with other libraries such + as libc. If you want these binaries to be fully statically linked, you can + set LDFLAGS like this: + + LDFLAGS=--static ./configure --disable-shared + + Note the two hyphens in --static. Of course, this works only if static + versions of all the relevant libraries are available for linking. See also + "Shared libraries" below. + +. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to + the "configure" command, the 16-bit library is also built. If you add + --enable-pcre2-32 to the "configure" command, the 32-bit library is also + built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 + to disable building the 8-bit library. + +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware + architectures. If you try to enable it on an unsupported architecture, there + will be a compile time error. If in doubt, use --enable-jit=auto, which + enables JIT only if the current hardware is supported. + +. If you are enabling JIT under SELinux environment you may also want to add + --enable-jit-sealloc, which enables the use of an executable memory allocator + that is compatible with SELinux. Warning: this allocator is experimental! + It does not support fork() operation and may crash when no disk space is + available. This option has no effect if JIT is disabled. + +. If you do not want to make use of the default support for UTF-8 Unicode + character strings in the 8-bit library, UTF-16 Unicode character strings in + the 16-bit library, or UTF-32 Unicode character strings in the 32-bit + library, you can add --disable-unicode to the "configure" command. This + reduces the size of the libraries. It is not possible to configure one + library with Unicode support, and another without, in the same configuration. + It is also not possible to use --enable-ebcdic (see below) with Unicode + support, so if this option is set, you must also use --disable-unicode. + + When Unicode support is available, the use of a UTF encoding still has to be + enabled by setting the PCRE2_UTF option at run time or starting a pattern + with (*UTF). When PCRE2 is compiled with Unicode support, its input can only + either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. + + As well as supporting UTF strings, Unicode support includes support for the + \P, \p, and \X sequences that recognize Unicode character properties. + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). + +. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any + of the preceding, or any of the Unicode newline sequences, or the NUL (zero) + character as indicating the end of a line. Whatever you specify at build time + is the default; the caller of PCRE2 can change the selection at run time. The + default newline indicator is a single LF character (the Unix standard). You + can specify the default newline indicator by adding --enable-newline-is-cr, + --enable-newline-is-lf, --enable-newline-is-crlf, + --enable-newline-is-anycrlf, --enable-newline-is-any, or + --enable-newline-is-nul to the "configure" command, respectively. + +. By default, the sequence \R in a pattern matches any Unicode line ending + sequence. This is independent of the option specifying what PCRE2 considers + to be the end of a line (see above). However, the caller of PCRE2 can + restrict \R to match only CR, LF, or CRLF. You can make this the default by + adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). + +. In a pattern, the escape sequence \C matches a single code unit, even in a + UTF mode. This can be dangerous because it breaks up multi-code-unit + characters. You can build PCRE2 with the use of \C permanently locked out by + adding --enable-never-backslash-C (note the upper case C) to the "configure" + command. When \C is allowed by the library, individual applications can lock + it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. + +. PCRE2 has a counter that limits the depth of nesting of parentheses in a + pattern. This limits the amount of system stack that a pattern uses when it + is compiled. The default is 250, but you can change it by setting, for + example, + + --with-parens-nest-limit=500 + +. PCRE2 has a counter that can be set to limit the amount of computing resource + it uses when matching a pattern. If the limit is exceeded during a match, the + match fails. The default is ten million. You can change the default by + setting, for example, + + --with-match-limit=500000 + + on the "configure" command. This is just the default; individual calls to + pcre2_match() or pcre2_dfa_match() can supply their own value. There is more + discussion in the pcre2api man page (search for pcre2_set_match_limit). + +. There is a separate counter that limits the depth of nested backtracking + (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a + matching process, which indirectly limits the amount of heap memory that is + used, and in the case of pcre2_dfa_match() the amount of stack as well. This + counter also has a default of ten million, which is essentially "unlimited". + You can change the default by setting, for example, + + --with-match-limit-depth=5000 + + There is more discussion in the pcre2api man page (search for + pcre2_set_depth_limit). + +. You can also set an explicit limit on the amount of heap memory used by + the pcre2_match() and pcre2_dfa_match() interpreters: + + --with-heap-limit=500 + + The units are kibibytes (units of 1024 bytes). This limit does not apply when + the JIT optimization (which has its own memory control features) is used. + There is more discussion on the pcre2api man page (search for + pcre2_set_heap_limit). + +. In the 8-bit library, the default maximum compiled pattern size is around + 64 kibibytes. You can increase this by adding --with-link-size=3 to the + "configure" command. PCRE2 then uses three bytes instead of two for offsets + to different parts of the compiled pattern. In the 16-bit library, + --with-link-size=3 is the same as --with-link-size=4, which (in both + libraries) uses four-byte offsets. Increasing the internal link size reduces + performance in the 8-bit and 16-bit libraries. In the 32-bit library, the + link size setting is ignored, as 4-byte offsets are always used. + +. Lookbehind assertions in which one or more branches can match a variable + number of characters are supported only if there is a maximum matching length + for each top-level branch. There is a limit to this maximum that defaults to + 255 characters. You can alter this default by a setting such as + + --with-max-varlookbehind=100 + + The limit can be changed at runtime by calling pcre2_set_max_varlookbehind(). + Lookbehind assertions in which every branch matches a fixed number of + characters (not necessarily all the same) are not constrained by this limit. + +. For speed, PCRE2 uses four tables for manipulating and identifying characters + whose code point values are less than 256. By default, it uses a set of + tables for ASCII encoding that is part of the distribution. If you specify + + --enable-rebuild-chartables + + a program called pcre2_dftables is compiled and run in the default C locale + when you obey "make". It builds a source file called pcre2_chartables.c. If + you do not specify this option, pcre2_chartables.c is created as a copy of + pcre2_chartables.c.dist. See "Character tables" below for further + information. + +. It is possible to compile PCRE2 for use on systems that use EBCDIC as their + character code (as opposed to ASCII/Unicode) by specifying + + --enable-ebcdic --disable-unicode + + This automatically implies --enable-rebuild-chartables (see above). However, + when PCRE2 is built this way, it always operates in EBCDIC. It cannot support + both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25, + which specifies that the code value for the EBCDIC NL character is 0x25 + instead of the default 0x15. + +. If you specify --enable-debug, additional debugging code is included in the + build. This option is intended for use by the PCRE2 maintainers. + +. In environments where valgrind is installed, if you specify + + --enable-valgrind + + PCRE2 will use valgrind annotations to mark certain memory regions as + unaddressable. This allows it to detect invalid memory accesses, and is + mostly useful for debugging PCRE2 itself. + +. In environments where the gcc compiler is used and lcov is installed, if you + specify + + --enable-coverage + + the build process implements a code coverage report for the test suite. The + report is generated by running "make coverage". If ccache is installed on + your system, it must be disabled when building PCRE2 for coverage reporting. + You can do this by setting the environment variable CCACHE_DISABLE=1 before + running "make" to build PCRE2. There is more information about coverage + reporting in the "pcre2build" documentation. + +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. There is support for calling external programs during matching in the + pcre2grep command, using PCRE2's callout facility with string arguments. This + support can be disabled by adding --disable-pcre2grep-callout to the + "configure" command. There are two kinds of callout: one that generates + output from inbuilt code, and another that calls an external program. The + latter has special support for Windows and VMS; otherwise it assumes the + existence of the fork() function. This facility can be disabled by adding + --disable-pcre2grep-callout-fork to the "configure" command. + +. The pcre2grep program currently supports only 8-bit data files, and so + requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use + libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by + specifying one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + Of course, the relevant libraries must be installed on your system. + +. The default starting size (in bytes) of the internal buffer used by pcre2grep + can be set by, for example: + + --with-pcre2grep-bufsize=51200 + + The value must be a plain integer. The default is 20480. The amount of memory + used by pcre2grep is actually three times this number, to allow for "before" + and "after" lines. If very long lines are encountered, the buffer is + automatically enlarged, up to a fixed maximum size. + +. The default maximum size of pcre2grep's internal buffer can be set by, for + example: + + --with-pcre2grep-max-bufsize=2097152 + + The default is either 1048576 or the value of --with-pcre2grep-bufsize, + whichever is the larger. + +. It is possible to compile pcre2test so that it links with the libreadline + or libedit libraries, by specifying, respectively, + + --enable-pcre2test-libreadline or --enable-pcre2test-libedit + + If this is done, when pcre2test's input is from a terminal, it reads it using + the readline() function. This provides line-editing and history facilities. + Note that libreadline is GPL-licensed, so if you distribute a binary of + pcre2test linked in this way, there may be licensing issues. These can be + avoided by linking with libedit (which has a BSD licence) instead. + + Enabling libreadline causes the -lreadline option to be added to the + pcre2test build. In many operating environments with a system-installed + readline library this is sufficient. However, in some environments (e.g. if + an unmodified distribution version of readline is in use), it may be + necessary to specify something like LIBS="-lncurses" as well. This is + because, to quote the readline INSTALL, "Readline uses the termcap functions, + but does not link with the termcap or curses library itself, allowing + applications which link with readline the option to choose an appropriate + library." If you get error messages about missing functions tgetstr, tgetent, + tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses + library should fix it. + +. The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in + environments other than Microsoft Visual Studio versions earlier than 2013 + when __STDC_VERSION__ is defined and has a value greater than or equal to + 199901L (indicating C99). However, there is at least one environment that + claims to be C99 but does not support these modifiers. If + --disable-percent-zt is specified, no use is made of the z or t modifiers. + Instead of %td or %zu, %lu is used, with a cast for size_t values. + +. There is a special option called --enable-fuzz-support for use by people who + want to run fuzzing tests on PCRE2. If set, it causes an extra library + called libpcre2-fuzzsupport.a to be built, but not installed. This contains + a single function called LLVMFuzzerTestOneInput() whose arguments are a + pointer to a string and the length of the string. When called, this function + tries to compile the string as a pattern, and if that succeeds, to match + it. This is done both with no options and with some random options bits that + are generated from the string. Setting --enable-fuzz-support also causes an + executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally + run under valgrind or used when PCRE2 is compiled with address sanitizing + enabled. It calls the fuzzing function and outputs information about what it + is doing. The input strings are specified by arguments: if an argument + starts with "=" the rest of it is a literal input string. Otherwise, it is + assumed to be a file name, and the contents of the file are the test string. + +. Releases before 10.30 could be compiled with --disable-stack-for-recursion, + which caused pcre2_match() to use individual blocks on the heap for + backtracking instead of recursive function calls (which use the stack). This + is now obsolete because pcre2_match() was refactored always to use the heap + (in a much more efficient way than before). This option is retained for + backwards compatibility, but has no effect other than to output a warning. + +The "configure" script builds the following files for the basic C library: + +. Makefile the makefile that builds the library +. src/config.h build-time configuration options for the library +. src/pcre2.h the public PCRE2 header file +. pcre2-config script that shows the building settings such as CFLAGS + that were set for "configure" +. libpcre2-8.pc ) +. libpcre2-16.pc ) data for the pkg-config command +. libpcre2-32.pc ) +. libpcre2-posix.pc ) +. libtool script that builds shared and/or static libraries + +Versions of config.h and pcre2.h are distributed in the src directory of PCRE2 +tarballs under the names config.h.generic and pcre2.h.generic. These are +provided for those who have to build PCRE2 without using "configure" or CMake. +If you use "configure" or CMake, the .generic versions are not used. + +The "configure" script also creates config.status, which is an executable +script that can be run to recreate the configuration, and config.log, which +contains compiler output from tests that "configure" runs. + +Once "configure" has run, you can run "make". This builds whichever of the +libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test +program called pcre2test. If you enabled JIT support with --enable-jit, another +test program called pcre2_jit_test is built as well. If the 8-bit library is +built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also +built. Running "make" with the -j option may speed up compilation on +multiprocessor systems. + +The command "make check" runs all the appropriate tests. Details of the PCRE2 +tests are given below in a separate section of this document. The -j option of +"make" can also be used when running the tests. + +You can use "make install" to install PCRE2 into live directories on your +system. The following are installed (file names are all relative to the + that is set when "configure" is run): + + Commands (bin): + pcre2test + pcre2grep (if 8-bit support is enabled) + pcre2-config + + Libraries (lib): + libpcre2-8 (if 8-bit support is enabled) + libpcre2-16 (if 16-bit support is enabled) + libpcre2-32 (if 32-bit support is enabled) + libpcre2-posix (if 8-bit support is enabled) + + Configuration information (lib/pkgconfig): + libpcre2-8.pc + libpcre2-16.pc + libpcre2-32.pc + libpcre2-posix.pc + + Header files (include): + pcre2.h + pcre2posix.h + + Man pages (share/man/man{1,3}): + pcre2grep.1 + pcre2test.1 + pcre2-config.1 + pcre2.3 + pcre2*.3 (lots more pages, all starting "pcre2") + + HTML documentation (share/doc/pcre2/html): + index.html + *.html (lots more pages, hyperlinked from index.html) + + Text file documentation (share/doc/pcre2): + AUTHORS + COPYING + ChangeLog + LICENCE + NEWS + README + SECURITY + pcre2.txt (a concatenation of the man(3) pages) + pcre2test.txt the pcre2test man page + pcre2grep.txt the pcre2grep man page + pcre2-config.txt the pcre2-config man page + +If you want to remove PCRE2 from your system, you can run "make uninstall". +This removes all the files that "make install" installed. However, it does not +remove any directories, because these are often shared with other programs. + + +Retrieving configuration information +------------------------------------ + +Running "make install" installs the command pcre2-config, which can be used to +recall information about the PCRE2 configuration and installation. For example: + + pcre2-config --version + +prints the version number, and + + pcre2-config --libs8 + +outputs information about where the 8-bit library is installed. This command +can be included in makefiles for programs that use PCRE2, saving the programmer +from having to remember too many details. Run pcre2-config with no arguments to +obtain a list of possible arguments. + +The pkg-config command is another system for saving and retrieving information +about installed libraries. Instead of separate commands for each library, a +single command is used. For example: + + pkg-config --libs libpcre2-16 + +The data is held in *.pc files that are installed in a directory called +/lib/pkgconfig. + + +Shared libraries +---------------- + +The default distribution builds PCRE2 as shared libraries and static libraries, +as long as the operating system supports shared libraries. Shared library +support relies on the "libtool" script which is built as part of the +"configure" process. + +The libtool script is used to compile and link both shared and static +libraries. They are placed in a subdirectory called .libs when they are newly +built. The programs pcre2test and pcre2grep are built to use these uninstalled +libraries (by means of wrapper scripts in the case of shared libraries). When +you use "make install" to install shared libraries, pcre2grep and pcre2test are +automatically re-built to use the newly installed shared libraries before being +installed themselves. However, the versions left in the build directory still +use the uninstalled libraries. + +To build PCRE2 using static libraries only you must use --disable-shared when +configuring it. For example: + +./configure --prefix=/usr/gnu --disable-shared + +Then run "make" in the usual way. Similarly, you can use --disable-static to +build only shared libraries. Note, however, that when you build only static +libraries, binary programs such as pcre2test and pcre2grep may still be +dynamically linked with other libraries (for example, libc) unless you set +LDFLAGS to --static when running "configure". + + +Cross-compiling using autotools +------------------------------- + +You can specify CC and CFLAGS in the normal way to the "configure" command, in +order to cross-compile PCRE2 for some other host. However, you should NOT +specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c +source file is compiled and run on the local host, in order to generate the +inbuilt character tables (the pcre2_chartables.c file). This will probably not +work, because pcre2_dftables.c needs to be compiled with the local compiler, +not the cross compiler. + +When --enable-rebuild-chartables is not specified, pcre2_chartables.c is +created by making a copy of pcre2_chartables.c.dist, which is a default set of +tables that assumes ASCII code. Cross-compiling with the default tables should +not be a problem. + +If you need to modify the character tables when cross-compiling, you should +move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by +hand and run it on the local host to make a new version of +pcre2_chartables.c.dist. See the pcre2build section "Creating character tables +at build time" for more details. + + +Making new tarballs +------------------- + +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. + +If you have modified any of the man page sources in the doc directory, you +should first run the maint/PrepareRelease script before making a distribution. +This script creates the .txt and HTML forms of the documentation from the man +pages. + + +Testing PCRE2 +------------- + +To test the basic PCRE2 library on a Unix-like system, run the RunTest script. +There is another script called RunGrepTest that tests the pcre2grep command. +When the 8-bit library is built, a test program for the POSIX wrapper, called +pcre2posix_test, is compiled, and when JIT support is enabled, a test program +called pcre2_jit_test is built. The scripts and the program tests are all run +when you obey "make check". For other environments, see the instructions in +NON-AUTOTOOLS-BUILD. + +The RunTest script runs the pcre2test test program (which is documented in its +own man page) on each of the relevant testinput files in the testdata +directory, and compares the output with the contents of the corresponding +testoutput files. RunTest uses a file called testtry to hold the main output +from pcre2test. Other files whose names begin with "test" are used as working +files in some tests. + +Some tests are relevant only when certain build-time options were selected. For +example, the tests for UTF-8/16/32 features are run only when Unicode support +is available. RunTest outputs a comment when it skips a test. + +Many (but not all) of the tests that are not skipped are run twice if JIT +support is available. On the second run, JIT compilation is forced. This +testing can be suppressed by putting "-nojit" on the RunTest command line. + +The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit +libraries that are enabled. If you want to run just one set of tests, call +RunTest with either the -8, -16 or -32 option. + +If valgrind is installed, you can run the tests under it by putting "-valgrind" +on the RunTest command line. To run pcre2test on just one or more specific test +files, give their numbers as arguments to RunTest, for example: + + RunTest 2 7 11 + +You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the +end), or a number preceded by ~ to exclude a test. For example: + + Runtest 3-15 ~10 + +This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests +except test 13. Whatever order the arguments are in, the tests are always run +in numerical order. + +You can also call RunTest with the single argument "list" to cause it to output +a list of tests. + +The test sequence starts with "test 0", which is a special test that has no +input file, and whose output is not checked. This is because it will be +different on different hardware and with different configurations. The test +exists in order to exercise some of pcre2test's code that would not otherwise +be run. + +Tests 1 and 2 can always be run, as they expect only plain text strings (not +UTF) and make no use of Unicode properties. The first test file can be fed +directly into the perltest.sh script to check that Perl gives the same results. +The only difference you should see is in the first few lines, where the Perl +version is given instead of the PCRE2 version. The second set of tests check +auxiliary functions, error detection, and run-time flags that are specific to +PCRE2. It also uses the debugging flags to check some of the internals of +pcre2_compile(). + +If you build PCRE2 with a locale setting that is not the standard C locale, the +character tables may be different (see next paragraph). In some cases, this may +cause failures in the second set of tests. For example, in a locale where the +isprint() function yields TRUE for characters in the range 128-255, the use of +[:isascii:] inside a character class defines a different set of characters, and +this shows up in this test as a difference in the compiled code, which is being +listed for checking. For example, where the comparison test output contains +[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other +cases. This is not a bug in PCRE2. + +Test 3 checks pcre2_maketables(), the facility for building a set of character +tables for a specific locale and using them instead of the default tables. The +script uses the "locale" command to check for the availability of the "fr_FR", +"french", or "fr" locale, and uses the first one that it finds. If the "locale" +command fails, or if its output doesn't include "fr_FR", "french", or "fr" in +the list of available locales, the third test cannot be run, and a comment is +output to say why. If running this test produces an error like this: + + ** Failed to set locale "fr_FR" + +it means that the given locale is not available on your system, despite being +listed by "locale". This does not mean that PCRE2 is broken. There are three +alternative output files for the third test, because three different versions +of the French locale have been encountered. The test passes if its output +matches any one of them. + +Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible +with the perltest.sh script, and test 5 checking PCRE2-specific things. + +Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in +non-UTF mode and UTF-mode with Unicode property support, respectively. + +Test 8 checks some internal offsets and code size features, but it is run only +when Unicode support is enabled. The output is different in 8-bit, 16-bit, and +32-bit modes and for different link sizes, so there are different output files +for each mode and link size. + +Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in +16-bit and 32-bit modes. These are tests that generate different output in +8-bit mode. Each pair are for general cases and Unicode support, respectively. + +Test 13 checks the handling of non-UTF characters greater than 255 by +pcre2_dfa_match() in 16-bit and 32-bit modes. + +Test 14 contains some special UTF and UCP tests that give different output for +different code unit widths. + +Test 15 contains a number of tests that must not be run with JIT. They check, +among other non-JIT things, the match-limiting features of the interpretive +matcher. + +Test 16 is run only when JIT support is not available. It checks that an +attempt to use JIT has the expected behaviour. + +Test 17 is run only when JIT support is available. It checks JIT complete and +partial modes, match-limiting under JIT, and other JIT-specific features. + +Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to +the 8-bit library, without and with Unicode support, respectively. + +Test 20 checks the serialization functions by writing a set of compiled +patterns to a file, and then reloading and checking them. + +Tests 21 and 22 test \C support when the use of \C is not locked out, without +and with UTF support, respectively. Test 23 tests \C when it is locked out. + +Tests 24 and 25 test the experimental pattern conversion functions, without and +with UTF support, respectively. + +Test 26 checks Unicode property support using tests that are generated +automatically from the Unicode data tables. + + +Character tables +---------------- + +For speed, PCRE2 uses four tables for manipulating and identifying characters +whose code point values are less than 256. By default, a set of tables that is +built into the library is used. The pcre2_maketables() function can be called +by an application to create a new set of tables in the current locale. This are +passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a +compile context. + +The source file called pcre2_chartables.c contains the default set of tables. +By default, this is created as a copy of pcre2_chartables.c.dist, which +contains tables for ASCII coding. However, if --enable-rebuild-chartables is +specified for ./configure, a new version of pcre2_chartables.c is built by the +program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C +character handling functions such as isalnum(), isalpha(), isupper(), +islower(), etc. to build the table sources. This means that the default C +locale that is set for your system will control the contents of these default +tables. You can change the default tables by editing pcre2_chartables.c and +then re-building PCRE2. If you do this, you should take care to ensure that the +file does not get automatically re-generated. The best way to do this is to +move pcre2_chartables.c.dist out of the way and replace it with your customized +tables. + +When the pcre2_dftables program is run as a result of specifying +--enable-rebuild-chartables, it uses the default C locale that is set on your +system. It does not pay attention to the LC_xxx environment variables. In other +words, it uses the system's default locale rather than whatever the compiling +user happens to have set. If you really do want to build a source set of +character tables in a locale that is specified by the LC_xxx variables, you can +run the pcre2_dftables program by hand with the -L option. For example: + + ./pcre2_dftables -L pcre2_chartables.c.special + +The second argument names the file where the source code for the tables is +written. The first two 256-byte tables provide lower casing and case flipping +functions, respectively. The next table consists of a number of 32-byte bit +maps which identify certain character classes such as digits, "word" +characters, white space, etc. These are used when building 32-byte bit maps +that represent character classes for code points less than 256. The final +256-byte table has bits indicating various character types, as follows: + + 1 white space character + 2 letter + 4 lower case letter + 8 decimal digit + 16 alphanumeric or '_' + +You can also specify -b (with or without -L) when running pcre2_dftables. This +causes the tables to be written in binary instead of as source code. A set of +binary tables can be loaded into memory by an application and passed to +pcre2_compile() in the same way as tables created dynamically by calling +pcre2_maketables(). The tables are just a string of bytes, independent of +hardware characteristics such as endianness. This means they can be bundled +with an application that runs in different environments, to ensure consistent +behaviour. + +See also the pcre2build section "Creating character tables at build time". + + +File manifest +------------- + +The distribution should contain the files listed below. + +(A) Source files for the PCRE2 library functions and their headers are found in + the src directory: + + src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c + when --enable-rebuild-chartables is specified + + src/pcre2_chartables.c.dist a default set of character tables that assume + ASCII coding; unless --enable-rebuild-chartables is + specified, used by copying to pcre2_chartables.c + + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_chkdint.c ) + src/pcre2_compile.c ) + src/pcre2_compile_class.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_ucptables.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) + + src/pcre2_printint.c debugging function that is used by pcre2test, + src/pcre2_fuzzsupport.c function for (optional) fuzzing support + + src/config.h.in template for config.h, when built by "configure" + src/pcre2.h.in template for pcre2.h when built by "configure" + src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_compile.h header for internal use + src/pcre2_internal.h header for internal use + src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_jit_char_inc.h header used by JIT + src/pcre2_jit_neon_inc.h header used by JIT + src/pcre2_jit_simd_inc.h header used by JIT + src/pcre2_ucp.h header for Unicode property handling + src/pcre2_util.h header for internal utils + + deps/sljit/sljit_src/* source files for the JIT compiler + +(B) Source files for programs that use PCRE2: + + src/pcre2demo.c simple demonstration of coding calls to PCRE2 + src/pcre2grep.c source of a grep utility that uses PCRE2 + src/pcre2test.c comprehensive test program + src/pcre2_jit_test.c JIT test program + src/pcre2posix_test.c POSIX wrapper API test program + +(C) Auxiliary files: + + AUTHORS.md information about the authors of PCRE2 + ChangeLog log of changes to the code + HACKING some notes about the internals of PCRE2 + INSTALL generic installation instructions + LICENCE.md conditions for the use of PCRE2 + COPYING the same, using GNU's standard name + SECURITY.md information on reporting vulnerabilities + Makefile.in ) template for Unix Makefile, which is built by + ) "configure" + Makefile.am ) the automake input that was used to create + ) Makefile.in + NEWS important changes in this release + NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools + README this file + RunTest a Unix shell script for running tests + RunGrepTest a Unix shell script for pcre2grep tests + RunTest.bat a Windows batch file for running tests + RunGrepTest.bat a Windows batch file for pcre2grep tests + aclocal.m4 m4 macros (generated by "aclocal") + m4/* m4 macros (used by autoconf) + configure a configuring shell script (built by autoconf) + configure.ac ) the autoconf input that was used to build + ) "configure" and config.h + doc/*.3 man page sources for PCRE2 + doc/*.1 man page sources for pcre2grep and pcre2test + doc/html/* HTML documentation + doc/pcre2.txt plain text version of the man pages + doc/pcre2-config.txt plain text documentation of pcre2-config script + doc/pcre2grep.txt plain text documentation of grep utility program + doc/pcre2test.txt plain text documentation of test program + libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config + libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config + libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config + libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config + ar-lib ) + config.guess ) + config.sub ) + depcomp ) helper tools generated by libtool and + compile ) automake, used internally by ./configure + install-sh ) + ltmain.sh ) + missing ) + test-driver ) + perltest.sh Script for running a Perl test program + pcre2-config.in source of script which retains PCRE2 information + testdata/testinput* test data for main library tests + testdata/testoutput* expected test results + testdata/grep* input and output for pcre2grep tests + testdata/* other supporting test files + +(D) Auxiliary files for CMake support + + cmake/COPYING-CMAKE-SCRIPTS + cmake/FindEditline.cmake + cmake/FindReadline.cmake + cmake/pcre2-config-version.cmake.in + cmake/pcre2-config.cmake.in + CMakeLists.txt + config-cmake.h.in + +(E) Auxiliary files for building PCRE2 "by hand" + + src/pcre2.h.generic ) a version of the public PCRE2 header file + ) for use in non-"configure" environments + src/config.h.generic ) a version of config.h for use in non-"configure" + ) environments + +(F) Auxiliary files for building PCRE2 using other build systems + + BUILD.bazel ) + MODULE.bazel ) files used by the Bazel build system + WORKSPACE.bazel ) + build.zig file used by zig's build system + +(G) Auxiliary files for building PCRE2 under OpenVMS + + vms/configure.com ) + vms/openvms_readme.txt ) These files were contributed by a PCRE2 user. + vms/pcre2.h_patch ) + vms/stdint.h ) + +============================== +Last updated: 18 December 2024 +============================== + diff --git a/3rd/pcre2/README.md b/3rd/pcre2/README.md new file mode 100644 index 00000000..d3fff179 --- /dev/null +++ b/3rd/pcre2/README.md @@ -0,0 +1,56 @@ +# PCRE2 - Perl-Compatible Regular Expressions + +The PCRE2 library is a set of C functions that implement regular expression +pattern matching using the same syntax and semantics as Perl 5. PCRE2 has its +own native API, as well as a set of wrapper functions that correspond to the +POSIX regular expression API. The PCRE2 library is free, even for building +proprietary software. It comes in three forms, for processing 8-bit, 16-bit, +or 32-bit code units, in either literal or UTF encoding. + +PCRE2 was first released in 2015 to replace the API in the original PCRE +library, which is now obsolete and no longer maintained. As well as a more +flexible API, the code of PCRE2 has been much improved since the fork. + +## Download + +As well as downloading from the +[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 +or the older, unmaintained PCRE1 library from an +[*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge. + +You can check out the PCRE2 source code via Git or Subversion: + + git clone https://github.com/PCRE2Project/pcre2.git + svn co https://github.com/PCRE2Project/pcre2.git + +## Contributed Ports + +If you just need the command-line PCRE2 tools on Windows, precompiled binary +versions are available at this +[Rexegg page](http://www.rexegg.com/pcregrep-pcretest.html). + +A PCRE2 port for z/OS, a mainframe operating system which uses EBCDIC as its +default character encoding, can be found at +[http://www.cbttape.org](http://www.cbttape.org/) (File 939). + +## Documentation + +You can read the PCRE2 documentation +[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html). + +Comparisons to Perl's regular expression semantics can be found in the +community authored Wikipedia entry for PCRE. + +There is a curated summary of changes for each PCRE release, copies of +documentation from older releases, and other useful information from the third +party authored +[RexEgg PCRE Documentation and Change Log page](http://www.rexegg.com/pcre-documentation.html). + +## Contact + +To report a problem with the PCRE2 library, or to make a feature request, please +use the PCRE2 GitHub issues tracker. There is a mailing list for discussion of + PCRE2 issues and development at pcre2-dev@googlegroups.com, which is where any +announcements will be made. You can browse the +[list archives](https://groups.google.com/g/pcre2-dev). + diff --git a/3rd/pcre2/RunGrepTest b/3rd/pcre2/RunGrepTest new file mode 100644 index 00000000..396884cc --- /dev/null +++ b/3rd/pcre2/RunGrepTest @@ -0,0 +1,1157 @@ +#! /bin/sh + +# Run pcre2grep tests. The assumption is that the PCRE2 tests check the library +# itself. What we are checking here is the file handling and options that are +# supported by pcre2grep. This script must be run in the build directory. + +# CODING CONVENTIONS: +# * Put printf arguments in single, not double quotes to avoid unwanted +# escaping. +# * Use \0 for binary zero in printf, not \x0, for the benefit of older +# versions (and use octal for other special values). + +# Set the C locale, so that sort(1) behaves predictably. + +LC_ALL=C +export LC_ALL + +# Remove any non-default colouring and aliases that the caller may have set. + +unset PCRE2GREP_COLOUR PCRE2GREP_COLOR PCREGREP_COLOUR PCREGREP_COLOR +unset GREP_COLOR GREP_COLORS +unset cp ls mv rm + +# Remember the current (build) directory, set the program to be tested, and +# valgrind settings when requested. + +builddir=`pwd` +: ${pcre2grep:=$builddir/pcre2grep} +: ${pcre2test:=$builddir/pcre2test} + +if [ ! -x $pcre2grep ] ; then + echo "** $pcre2grep does not exist or is not executable." + exit 1 +fi + +if [ ! -x $pcre2test ] ; then + echo "** $pcre2test does not exist or is not executable." + exit 1 +fi + +valgrind= +while [ $# -gt 0 ] ; do + case $1 in + valgrind|-valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all-non-file --error-exitcode=70";; + *) echo "RunGrepTest: Unknown argument $1"; exit 1;; + esac + shift +done + +pcre2grep_version=`$pcre2grep -V` +if [ "$valgrind" = "" ] ; then + echo "Testing $pcre2grep_version" +else + echo "Testing $pcre2grep_version using valgrind" +fi + +# Set up a suitable "diff" command for comparison. Some systems have a diff +# that lacks a -u option. Try to deal with this; better do the test for the -b +# option as well. + +cf="diff" +diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b" +diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u" +diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub" + +# Add a -a (always treat as text) if available. This was added in an attempt +# to get more detail from an Alpine Linux test failure on GitHub. + +$cf -a /dev/null /dev/null 2>/dev/null && cf="$cf -a" + +# Some tests involve NUL characters. It seems impossible to handle them easily +# in many operating systems. An earlier version of this script used sed to +# translate NUL into the string ZERO, but this didn't work on Solaris (aka +# SunOS), where the version of sed explicitly doesn't like them, and also MacOS +# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine, +# even when using GNU sed. A user suggested using tr instead, which +# necessitates translating to a single character. However, on (some versions +# of?) Solaris, the normal "tr" cannot handle binary zeros, but if +# /usr/xpg4/bin/tr is available, it can do so, so test for that. + +if [ -x /usr/xpg4/bin/tr ] ; then + tr=/usr/xpg4/bin/tr +else + tr=tr +fi + +# If this test is being run from "make check", $srcdir will be set. If not, set +# it to the current or parent directory, whichever one contains the test data. +# Subsequently, we run most of the pcre2grep tests in the source directory so +# that the file names in the output are always the same. + +if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then + if [ -d "./testdata" ] ; then + srcdir=. + elif [ -d "../testdata" ] ; then + srcdir=.. + else + echo "Cannot find the testdata directory" + exit 1 + fi +fi + +# Set up the path to the valgrind JIT suppressions + +vjs= +if [ "$valgrind" != "" ] ; then + $pcre2test -C jit >/dev/null + if [ $? -ne 0 ]; then + vjs="--suppressions=`realpath "$srcdir"`/testdata/valgrind-jit.supp" + fi +fi + +# Check for the availability of UTF-8 support + +$pcre2test -C unicode >/dev/null +utf8=$? + +# Check default newline convention. If it does not include LF, force LF. + +nl=`$pcre2test -C newline` +if [ "$nl" != "LF" -a "$nl" != "ANY" -a "$nl" != "ANYCRLF" ]; then + pcre2grep="$pcre2grep -N LF" + echo "Default newline setting forced to LF" +fi + +# ------ Function to run and check a special pcre2grep arguments test ------- + +checkspecial() + { + $valgrind $pcre2grep $1 >>testtrygrep 2>&1 + if [ $? -ne $2 ] ; then + echo "** pcre2grep $1 failed - check testtrygrep" + exit 1 + fi + } + +# ------ Normal tests ------ + +echo "Testing pcre2grep main features" + +echo "---------------------------- Test 1 ------------------------------" >testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 2 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '^PATTERN' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 3 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -in PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 4 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -ic PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 5 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 6 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 7 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 8 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 9 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 10 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 11 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -vn pattern ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 12 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -ix pattern ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 13 -----------------------------" >>testtrygrep +echo seventeen >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f./testdata/greplist -f $builddir/testtemp1grep ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 14 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 15 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep 'abc^*' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 16 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep abc ./testdata/grepinput ./testdata/nonexistfile) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 17 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M 'the\noutput' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 18 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 19 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mix 'Pattern' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 20 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 21 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 22 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB3 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 23 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -C3 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 24 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A9 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 25 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB9 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 26 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A9 -B9 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 27 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A10 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 28 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nB10 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 29 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -C12 -B10 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 30 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 31 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 32 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 33 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 34 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -s 'fox' ./testdata/grepnonexist) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 35 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 36 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude='grepinput(Bad)?8' --exclude=grepinputM --exclude=grepinputUN --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 37 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '^(a+)*\d' ./testdata/grepinput) >>testtrygrep 2>teststderrgrep +echo "RC=$?" >>testtrygrep +echo "======== STDERR ========" >>testtrygrep +cat teststderrgrep >>testtrygrep + +echo "---------------------------- Test 38 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep '>\x00<' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 39 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A1 'before the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 40 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 'after the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 41 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 42 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 43 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on 'before|zero|after' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 44 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on -e before -ezero -e after ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 45 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 46 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -e 'unopened)' -e abc ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex=123 -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 47 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Fx "AB.VE +elephant" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 48 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -F "AB.VE +elephant" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 49 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -F -e DATA -e "AB.VE +elephant" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 50 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 51 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mv "brown\sfox" ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 52 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always jumps ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 53 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 54 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 55 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 56 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -c --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 57 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -c -l --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 58 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex=PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 59 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regexp=PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 60 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regex PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 61 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --regexp PATTERN ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 62 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $pcre2grep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 63 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1K --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 64 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 65 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 66 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 67 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 68 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 69 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -vn --colour=always pattern ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 70 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 71 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 72 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 73 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 74 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 75 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 76 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 77 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 78 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 79 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 80 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o "\b01|\b02" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 81 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 82 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 83 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 84 -----------------------------" >>testtrygrep +echo testdata/grepinput3 >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1grep "fox|complete|t7") >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 85 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 86 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 87 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 88 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -v "cat" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 89 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -I "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 90 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 91 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -a "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 92 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 93 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --text "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 94 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 95 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 96 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MCU] 'fox' ./test* | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 97 -----------------------------" >>testtrygrep +echo "grepinput$" >testtemp1grep +echo "grepinput8" >>testtemp1grep +echo "grepinputBad8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 98 -----------------------------" >>testtrygrep +echo "grepinput$" >testtemp1grep +echo "grepinput8" >>testtemp1grep +echo "grepinputBad8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 99 -----------------------------" >>testtrygrep +echo "grepinput$" >testtemp1grep +echo "grepinput8" >testtemp2grep +echo "grepinputBad8" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 100 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 101 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 102 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -n "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 103 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 104 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -n --only-matching "^$" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 105 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always "ipsum|" ./testdata/grepinput3) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 106 -----------------------------" >>testtrygrep +(cd $srcdir; echo "a" | $valgrind $vjs $pcre2grep -M "|a" ) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 107 -----------------------------" >>testtrygrep +echo "a" >testtemp1grep +echo "aaaaa" >>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets --allow-lookaround-bsk '(?<=\Ka)' $builddir/testtemp1grep) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 108 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -lq PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 109 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -cq --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 110 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 111 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 112 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 113 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --total-count --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 114 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tc --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 115 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tlc --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 116 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MCU] -th 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 117 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tch --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 118 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -tL --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 119 -----------------------------" >>testtrygrep +printf '123\n456\n789\n---abc\ndef\nxyz\n---\n' >testNinputgrep +$valgrind $vjs $pcre2grep -Mo '(\n|[^-])*---' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 120 ------------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '$&:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HO 'XX$' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -O '$x{12345678}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -O '$x{123Z' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --output '$x{1234}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 121 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -F '\E and (regex)' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 122 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w 'cat|dog' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 123 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -w 'dog|cat' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 124 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 125 -----------------------------" >>testtrygrep +printf 'abcd\n' >testNinputgrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K.)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=.\K)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K[ac])' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +GREP_COLORS='ms=1;20' $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 126 -----------------------------" >>testtrygrep +printf 'Next line pattern has binary zero\nABC\0XYZ\n' >testtemp1grep +printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep +$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep +echo "RC=$?" >>testtrygrep +printf 'Next line pattern is erroneous.\n^abc)(xy' >testtemp1grep +$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 127 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 128 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m1M -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 129 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m 2 'fox' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 130 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -o -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 131 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -oc -m2 'fox' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 132 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 133 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 134 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --max-count=1 -nH -O '=$x{41}$x423$o{103}$o1045=' 'fox' -) <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 135 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 136 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -m1MK -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --max-count=1MK -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 137 -----------------------------" >>testtrygrep +printf 'Last line\nhas no newline' >testtemp1grep +$valgrind $vjs $pcre2grep -A1 Last testtemp1grep >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 138 -----------------------------" >>testtrygrep +printf 'AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n' >testtemp1grep +$valgrind $vjs $pcre2grep --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 139 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-buffered 'fox' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 140 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 -A1 'brown' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 141 -----------------------------" >>testtrygrep +printf "%s/testdata/grepinputv\n-\n" "$srcdir" >testtemp1grep +printf 'This is a line from stdin.' >testtemp2grep +$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 142 -----------------------------" >>testtrygrep +printf "/does/not/exist\n" >testtemp1grep +printf 'This is a line from stdin.' >testtemp2grep +$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 143 -----------------------------" >>testtrygrep +printf 'fox|cat' >testtemp1grep +$valgrind $vjs $pcre2grep -f - $srcdir/testdata/grepinputv >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 144 -----------------------------" >>testtrygrep +$valgrind $vjs $pcre2grep -f /non/exist $srcdir/testdata/grepinputv >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 145 -----------------------------" >>testtrygrep +printf '*meta*\rdog.' >testtemp1grep +$valgrind $vjs $pcre2grep -Ncr -F -f testtemp1grep $srcdir/testdata/grepinputv >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 146 -----------------------------" >>testtrygrep +printf 'A123B' >testtemp1grep +$valgrind $vjs $pcre2grep -H -e '123|fox' - >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -h -e '123|fox' - $srcdir/testdata/grepinputv >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep - $srcdir/testdata/grepinputv >testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 147 -----------------------------" >>testtrygrep +$valgrind $vjs $pcre2grep -e '123|fox' -- -nonfile >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 148 -----------------------------" >>testtrygrep +$valgrind $vjs $pcre2grep --nonexist >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -n-n-bad >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --context >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --only-matching --output=xx >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --colour=badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --newline=badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -d badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -D badvalue >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --buffer-size=0 >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --exclude '(badpat' abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --include-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 149 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=binary "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=wrong "dog" ./testdata/grepbinary) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +# This test runs the code that tests locale support. However, on some systems +# (e.g. Alpine Linux) there is no locale support and running this test just +# generates a "no match" result. Therefore, we test for locale support, and if +# it is found missing, we pretend that the test has run as expected so that the +# output matches. + +echo "---------------------------- Test 150 -----------------------------" >>testtrygrep +which locale >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "pcre2grep: Failed to set locale locale.bad (obtained from LC_CTYPE)" >>testtrygrep + echo "RC=2" >>testtrygrep +else + + (cd $srcdir; unset LC_ALL; LC_CTYPE=locale.bad $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep +fi + +echo "---------------------------- Test 151 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 152 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 153 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 154 -----------------------------" >>testtrygrep +>testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 155 -----------------------------" >>testtrygrep +echo "" >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 156 -----------------------------" >>testtrygrep +echo "" >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file --file $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 157 -----------------------------" >>testtrygrep +echo "spaces " >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -o --posix-pattern-file --file=$builddir/testtemp1grep ./testdata/grepinputv >$builddir/testtemp2grep && $valgrind $vjs $pcre2grep -q "s " $builddir/testtemp2grep) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 158 -----------------------------" >>testtrygrep +echo "spaces." >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep -f $builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 159 -----------------------------" >>testtrygrep +printf "spaces.\r\n" >testtemp1grep +(cd $srcdir; $valgrind $vjs $pcre2grep --posix-pattern-file -f$builddir/testtemp1grep ./testdata/grepinputv) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 160 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -nC3 '^(ert|jkl)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep -n -B4 -A2 '^(ert|dfg)' ./testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + + +# Now compare the results. + +$cf $srcdir/testdata/grepoutput testtrygrep +if [ $? != 0 ] ; then exit 1; fi + + +# These tests require UTF-8 support + +if [ $utf8 -ne 0 ] ; then + echo "Testing pcre2grep UTF-8 features" + + echo "---------------------------- Test U1 ------------------------------" >testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U2 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U3 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any --allow-lookaround-bsk '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U4 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' ./testdata/grepinputBad8) >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U5 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -U -o '....' ./testdata/grepinputBad8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U6 -----------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U7 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -ui --colour=always 'k+|\babc\b' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U8 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -UiEP --colour=always 'k+|\babc\b' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U9 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u --colour=always 'A\d' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "---------------------------- Test U10 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -u --posix-digit --colour=always 'A\d' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + + $cf $srcdir/testdata/grepoutput8 testtrygrep + if [ $? != 0 ] ; then exit 1; fi + +else + echo "Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library" +fi + + +# We go to some contortions to try to ensure that the tests for the various +# newline settings will work in environments where the normal newline sequence +# is not \n. Do not use exported files, whose line endings might be changed. +# Instead, create an input file using printf so that its contents are exactly +# what we want. Note the messy fudge to get printf to write a string that +# starts with a hyphen. These tests are run in the build directory. + +echo "Testing pcre2grep newline settings" +printf 'abc\rdef\r\nghi\njkl' >testNinputgrep + +printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep +$valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n -N CR "^def" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep +pattern=`printf 'def\rjkl'` +$valgrind $vjs $pcre2grep -n --newline=cr -F "$pattern" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N4 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep +printf 'xyz\0abc\0def' >testNinputgrep +$valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep +$valgrind $vjs $pcre2grep -B1 -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep +echo "RC=$?" >>testtrygrep + +printf '%c--------------------------- Test N8 ------------------------------\r\n' - >>testtrygrep +$valgrind $vjs $pcre2grep -na --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "" >>testtrygrep + +$cf $srcdir/testdata/grepoutputN testtrygrep +if [ $? != 0 ] ; then exit 1; fi + + +# These newline tests need UTF support. + +if [ $utf8 -ne 0 ] ; then + echo "Testing pcre2grep newline settings with UTF-8 features" + + printf '%c--------------------------- Test UN1 ------------------------------\r\n' - >testtrygrep + $valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" $srcdir/testdata/grepinputUN >>testtrygrep + echo "RC=$?" >>testtrygrep + + printf '%c--------------------------- Test UN2 ------------------------------\r\n' - >testtrygrep + $valgrind $vjs $pcre2grep -nauU --newline=anycrlf "^a" $srcdir/testdata/grepinputBad8_Trail >>testtrygrep + echo "RC=$?" >>testtrygrep + + echo "" >>testtrygrep + + $cf $srcdir/testdata/grepoutputUN testtrygrep + if [ $? != 0 ] ; then exit 1; fi +else + echo "Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library" +fi + + +# If pcre2grep supports script callouts, run some tests on them. It is possible +# to restrict these callouts to the non-fork case, either for security, or for +# environments that do not support fork(). This is handled by comparing to a +# different output. + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scripts in patterns are supported'; then + echo "Testing pcre2grep script callouts" + echo "--- Test 1 ---" >testtrygrep + $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 2 ---" >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 3 ---" >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 4 ---" >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"/bin/echo|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 5 ---" >>testtrygrep + $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 6 ---" >>testtrygrep + $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + + if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then + nonfork=1 + $cf $srcdir/testdata/grepoutputCN testtrygrep + else + nonfork=0 + $cf $srcdir/testdata/grepoutputC testtrygrep + fi + if [ $? != 0 ] ; then exit 1; fi + + # These callout tests need UTF support. + + if [ $utf8 -ne 0 ] ; then + echo "Testing pcre2grep script callout with UTF-8 features" + echo "--- Test 1 ---" >testtrygrep + $valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + echo "--- Test 2 ---" >>testtrygrep + $valgrind $vjs $pcre2grep -u '(T)(?C"/bin/echo|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep + echo "RC=$?" >>testtrygrep + + if [ $nonfork = 1 ] ; then + $cf $srcdir/testdata/grepoutputCNU testtrygrep + else + $cf $srcdir/testdata/grepoutputCU testtrygrep + fi + if [ $? != 0 ] ; then exit 1; fi + else + echo "Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library" + fi + + unset nonfork +else + echo "Script callouts are not supported" +fi + + +# Test reading .gz and .bz2 files when supported. + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q '\.gz are read using zlib'; then + echo "Testing reading .gz file" + $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepinputC.gz >testtrygrep + echo "RC=$?" >>testtrygrep + $cf $srcdir/testdata/grepoutputCgz testtrygrep + if [ $? != 0 ] ; then exit 1; fi +fi + +if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q '\.bz2 are read using bzlib2'; then + echo "Testing reading .bz2 file" + $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepinputC.bz2 >testtrygrep + echo "RC=$?" >>testtrygrep + $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepnot.bz2 >>testtrygrep + echo "RC=$?" >>testtrygrep + $cf $srcdir/testdata/grepoutputCbz2 testtrygrep + if [ $? != 0 ] ; then exit 1; fi +fi + + +# Finally, some tests to exercise code that is not tested above, just to be +# sure that it runs OK. Doing this improves the coverage statistics. The output +# is not checked. + +echo "Testing miscellaneous pcre2grep arguments (unchecked)" +echo '' >testtrygrep +checkspecial '-xxxxx' 2 +checkspecial '--help' 0 +checkspecial '--line-buffered --colour=auto abc /dev/null' 1 +checkspecial '--line-buffered --color abc /dev/null' 1 +checkspecial '-dskip abc .' 1 +checkspecial '-Dread -Dskip abc /dev/null' 1 + +# Clean up local working files +rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep + +exit 0 + +# End diff --git a/3rd/pcre2/RunGrepTest.bat b/3rd/pcre2/RunGrepTest.bat new file mode 100644 index 00000000..47111cf4 --- /dev/null +++ b/3rd/pcre2/RunGrepTest.bat @@ -0,0 +1,1109 @@ +@echo off + +:: Run pcre2grep tests. The assumption is that the PCRE2 tests check the library +:: itself. What we are checking here is the file handling and options that are +:: supported by pcre2grep. This script must be run in the build directory. +:: (jmh: I've only tested in the main directory, using my own builds.) + +setlocal enabledelayedexpansion + +:: Remove any non-default colouring that the caller may have set. + +set PCRE2GREP_COLOUR= +set PCRE2GREP_COLOR= +set PCREGREP_COLOUR= +set PCREGREP_COLOR= +set GREP_COLORS= +set GREP_COLOR= + +:: Remember the current (build) directory and set the program to be tested. + +set builddir="%CD%" + +if [%pcre2grep%]==[] set pcre2grep=%builddir%\pcre2grep.exe +if [%pcre2test%]==[] set pcre2test=%builddir%\pcre2test.exe + +if NOT exist %pcre2grep% ( + echo ** %pcre2grep% does not exist. + exit /b 1 +) + +if NOT exist %pcre2test% ( + echo ** %pcre2test% does not exist. + exit /b 1 +) + +for /f "delims=" %%a in ('"%pcre2grep%" -V') do set pcre2grep_version=%%a +echo Testing %pcre2grep_version% + +:: Set up a suitable "diff" command for comparison. Some systems have a diff +:: that lacks a -u option. Try to deal with this; better do the test for the -b +:: option as well. Use FC if there's no diff, taking care to ignore equality. + +set cf= +set cfout= +diff -b nul nul 2>nul && set cf=diff -b +diff -u nul nul 2>nul && set cf=diff -u +diff -ub nul nul 2>nul && set cf=diff -ub +if NOT defined cf ( + set cf=fc /n + set "cfout=>testcf || (type testcf & cmd /c exit /b 1)" +) + +:: Set srcdir to the current or parent directory, whichever one contains the +:: test data. Subsequently, we run most of the pcre2grep tests in the source +:: directory so that the file names in the output are always the same. + +if NOT defined srcdir set srcdir=. +if NOT exist %srcdir%\testdata\ ( + if exist testdata\ ( + set srcdir=. + ) else if exist ..\testdata\ ( + set srcdir=.. + ) else if exist ..\..\testdata\ ( + set srcdir=..\.. + ) else ( + echo Cannot find the testdata directory + exit /b 1 + ) +) + +:: Check for the availability of UTF-8 support + +%pcre2test% -C unicode >nul +set utf8=%ERRORLEVEL% + +:: Check default newline convention. If it does not include LF, force LF. + +for /f %%a in ('"%pcre2test%" -C newline') do set nl=%%a +if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" ( + set pcre2grep=%pcre2grep% -N LF + echo Default newline setting forced to LF +) + +:: Create a simple printf via cscript/JScript (an actual printf may translate +:: LF to CRLF, which this one does not). We only support the barebones we need: +:: \r, \n, \0, and %s (but only once). + +echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n").replace(/\\0/g, "\x00").replace(/%%s/g, function() { return WScript.Arguments(1) })) >printf.js +set printf=cscript //nologo printf.js + +:: Create a simple 'tr' via cscript/JScript. +echo WScript.StdOut.Write(WScript.StdIn.ReadAll().replace(/\x00/g, "@")) >trnull.js +set trnull=cscript //nologo trnull.js + +:: ------ Normal tests ------ + +echo Testing pcre2grep main features + +echo ---------------------------- Test 1 ------------------------------>testtrygrep +(pushd %srcdir% & %pcre2grep% PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 2 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% "^PATTERN" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 3 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 4 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -ic PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 5 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 6 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -inh PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 7 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -il PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 8 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -l PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 9 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -q PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 10 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 11 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -vn pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 12 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -ix pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 13 ----------------------------->>testtrygrep +echo seventeen >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f./testdata/greplist -f %builddir%\testtemp1grep ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 14 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w pat ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 15 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "abc^*" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 16 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% abc ./testdata/grepinput ./testdata/nonexistfile & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 17 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -M "the\noutput" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 18 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn "(the\noutput|dog\.\n--)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 19 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mix "Pattern" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 20 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mixn "complete pair\nof lines" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 21 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 22 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 23 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -C3 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 24 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 25 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 26 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A9 -B9 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 27 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -A10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 28 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nB10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 29 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -C12 -B10 "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 30 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -inB3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 31 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -inA3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 32 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L "fox" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 33 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 34 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -s "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 35 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 36 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include="grepinput[^C]" --exclude "grepinput$" --exclude="grepinput(Bad)?8" --exclude=grepinputM --exclude=grepinputUN --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 37 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "^(a+)*\d" ./testdata/grepinput & popd) >>testtrygrep 2>teststderrgrep +echo RC=^%ERRORLEVEL%>>testtrygrep +echo ======== STDERR ========>>testtrygrep +type teststderrgrep >>testtrygrep + +echo ---------------------------- Test 38 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% ">\x00<" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 39 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -A1 "before the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 40 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 "after the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 41 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 -o "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 42 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -B1 -onH "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 43 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 44 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on -e before -ezero -e after ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 45 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -on -f ./testdata/greplist -e binary ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 46 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -e "unopened)" -e abc ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex=123 -eabc -e xyz -e "[unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 47 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Fx AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 48 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -F AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 49 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -F -e DATA -e AB.VE^ + +elephant ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 50 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% "^(abc|def|ghi|jkl)" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 51 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mv "brown\sfox" ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 52 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always jumps ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 53 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 54 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 55 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -f./testdata/greplist --color=always ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 56 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -c --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 57 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -c -l --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 58 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex=PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 59 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regexp=PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 60 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regex PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 61 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --regexp PATTERN ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 62 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --match-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 63 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --recursion-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 64 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o1 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 65 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 66 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o3 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 67 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o12 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 68 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% --only-matching=2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 69 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -vn --colour=always pattern ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 70 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -M -n "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 71 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 72 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 73 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 74 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 75 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 76 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 77 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 78 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 79 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 80 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 81 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --color=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 82 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --colour=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 83 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 84 ----------------------------->>testtrygrep +echo testdata/grepinput3 >testtemp1grep +(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --file-list %builddir%\testtemp1grep "fox|complete|t7" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 85 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 86 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 87 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 88 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -v "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 89 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -I "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 90 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=without-match "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 91 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -a "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 92 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 93 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 94 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 "fox" ./testdata/grepinput* | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 95 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 96 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" --exclude=grepinput[MCU] "fox" ./test* | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 97 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>>testtemp1grep +echo grepinputBad8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 98 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>>testtemp1grep +echo grepinputBad8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --exclude=grepinput[MCU] --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 99 ----------------------------->>testtrygrep +echo grepinput$>testtemp1grep +echo grepinput8>testtemp2grep +echo grepinputBad8>>testtemp1grep +(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude=grepinput[MCU] --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 100 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Ho2 --only-matching=1 -o3 "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 101 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator="|" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 102 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -n "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 103 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 104 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -n --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 105 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always "ipsum|" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 106 ----------------------------->>testtrygrep +(pushd %srcdir% & echo a| %pcre2grep% -M "|a" & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 107 ----------------------------->>testtrygrep +echo a>testtemp1grep +echo aaaaa>>testtemp1grep +(pushd %srcdir% & %pcre2grep% --line-offsets --allow-lookaround-bsk "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 108 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -lq PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 109 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -cq --exclude=grepinputC lazy ./testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 110 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --om-separator / -Mo0 -o1 -o2 "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 111 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 112 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --file-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 113 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --total-count --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 114 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 115 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tlc --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 116 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --exclude=grepinput[MCU] -th "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 117 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tch --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 118 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -tL --exclude=grepinputC "the" testdata/grepinput* & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 119 ----------------------------->>testtrygrep +%printf% "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep +%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 120 ------------------------------>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "$&:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -m 1 -O "$0:$a$b$e$f$r$t$v" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "${X}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -HO "XX$" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -O "$x{12345678}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -O "$x{123Z" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --output "$x{1234}" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 121 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -F "\E and (regex)" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 122 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w "cat|dog" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 123 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -w "dog|cat" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 124 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn --colour=always "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn --colour=always -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -Mn -A2 "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 125 ----------------------------->>testtrygrep +%printf% "abcd\n" >testNinputgrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K.)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?=.\K)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?<=\K[ac])" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +set GREP_COLORS=ms=1;20 +%pcre2grep% --colour=always --allow-lookaround-bsk "(?=[ac]\K)" testNinputgrep >>testtrygrep +set GREP_COLORS= +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 126 ----------------------------->>testtrygrep +%printf% "Next line pattern has binary zero\nABC\0XYZ\n" >testtemp1grep +%printf% "ABC\0XYZ\nABCDEF\nDEFABC\n" >testtemp2grep +%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%printf% "Next line pattern is erroneous.\n^abc)(xy" >testtemp1grep +%pcre2grep% -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 127 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 128 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -m1M -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 129 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -m 2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 130 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -o -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 131 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -oc -m2 "fox" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 132 ----------------------------->>testtrygrep +:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel +:: level, are not even numbered). Use a subshell instead. +(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& head -1) >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 133 ----------------------------->>testtrygrep +:: The Unix tests use fd3 here, but Windows only has StdIn/StdOut/StdErr (which, at the kernel +:: level, are not even numbered). Use a subshell instead. +(pushd %srcdir% & (%pcre2grep% -m1 -A3 "^match" & echo ---& %pcre2grep% -m1 -A3 "^match") >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 134 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --max-count=1 -nH -O "=$x{41}$x423$o{103}$o1045=" "fox" - & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 135 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -lZ "word" ./testdata/grepinputv ./testdata/grepinputv & popd) | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -A 1 -B 1 -HZ "word" ./testdata/grepinputv & popd) | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -MHZn "start[\s]+end" testdata/grepinputM & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 136 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -m1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --max-count=1MK -o1 --om-capture=0 "pattern()()()()" testdata/grepinput & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 137 ----------------------------->>testtrygrep +%printf% "Last line\nhas no newline" >testtemp1grep +%pcre2grep% -A1 Last testtemp1grep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 138 ----------------------------->>testtrygrep +%printf% "AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n" >testtemp1grep +%pcre2grep% --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 139 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --line-buffered "fox" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 140 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --buffer-size=10 -A1 "brown" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 141 ----------------------------->>testtrygrep +%printf% "%%s\testdata\grepinputv\n-\n" "%srcdir%" >testtemp1grep +%printf% "This is a line from stdin." >testtemp2grep +%pcre2grep% --file-list testtemp1grep "line from stdin" >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 142 ----------------------------->>testtrygrep +%printf% "/does/not/exist\n" >testtemp1grep +%printf% "This is a line from stdin." >testtemp2grep +%pcre2grep% --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 143 ----------------------------->>testtrygrep +%printf% "fox|cat" >testtemp1grep +%pcre2grep% -f - %srcdir%\testdata\grepinputv >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 144 ----------------------------->>testtrygrep +%pcre2grep% -f /non/exist %srcdir%\testdata\grepinputv >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 145 ----------------------------->>testtrygrep +%printf% "*meta*\rdog." >testtemp1grep +%pcre2grep% -Ncr -F -f testtemp1grep %srcdir%\testdata\grepinputv >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 146 ----------------------------->>testtrygrep +%printf% "A123B" >testtemp1grep +%pcre2grep% -H -e "123|fox" - >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -h -e "123|fox" - %srcdir%\testdata\grepinputv >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% - %srcdir%\testdata\grepinputv >testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 147 ----------------------------->>testtrygrep +%pcre2grep% -e "123|fox" -- -nonfile >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 148 ----------------------------->>testtrygrep +%pcre2grep% --nonexist >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -n-n-bad >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --context >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --only-matching --output=xx >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --colour=badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --newline=badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -d badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -D badvalue >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --buffer-size=0 >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --exclude "(badpat" abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --include-from /non/exist abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 149 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=binary "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% --binary-files=wrong "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 150 ----------------------------->>testtrygrep +:: The Unix version of this tests checks for whether locales are supported. On Windows, +:: we assume they always are. +set LC_ALL= +set LC_CTYPE=locale.bad +(pushd %srcdir% & %pcre2grep% abc /dev/null & popd) >>testtrygrep 2>&1 +echo RC=^%ERRORLEVEL%>>testtrygrep +set LC_CTYPE= + +echo ---------------------------- Test 151 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% --colour=always -e this -e The -e "The wo" testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 152 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 --group-separator="++" "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 153 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nA3 --no-group-separator "four" ./testdata/grepinputx & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 154 ----------------------------->>testtrygrep +echo. >nul 2>testtemp1grep +(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 155 ----------------------------->>testtrygrep +echo. >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 156 ----------------------------->>testtrygrep +%printf% "\n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% --posix-pattern-file --file %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 157 ----------------------------->>testtrygrep +%printf% "spaces \n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% -o --posix-pattern-file --file=%builddir%\testtemp1grep ./testdata/grepinputv >%builddir%\testtemp2grep && %pcre2grep% -q "s " %builddir%\testtemp2grep & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 158 ----------------------------->>testtrygrep +%printf% "spaces.\n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% -f %builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 159 ----------------------------->>testtrygrep +%printf% "spaces.\r\n" >testtemp1grep +(pushd %srcdir% & %pcre2grep% --posix-pattern-file -f%builddir%\testtemp1grep ./testdata/grepinputv & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test 160 ----------------------------->>testtrygrep +(pushd %srcdir% & %pcre2grep% -nC3 "^(ert|jkl)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +(pushd %srcdir% & %pcre2grep% -n -B4 -A2 "^(ert|dfg)" ./testdata/grepinput & popd) >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +:: Now compare the results. + +%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout% +if ERRORLEVEL 1 exit /b 1 + + +:: These tests require UTF-8 support + +if %utf8% neq 0 ( + echo Testing pcre2grep UTF-8 features + + echo ---------------------------- Test U1 ------------------------------>testtrygrep + (pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U2 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U3 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any --allow-lookaround-bsk "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U4 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -u -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep 2>&1 + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U5 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -U -o "...." ./testdata/grepinputBad8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U6 ----------------------------->>testtrygrep + (pushd %srcdir% & %pcre2grep% -u -m1 -O "=$x{1d3}$o{744}=" "fox" & popd) <%srcdir%\testdata\grepinputv >>testtrygrep 2>&1 + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U7 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -ui --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U8 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -UiEP --colour=always "k+|\babc\b" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U9 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -u --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test U10 ------------------------------>>testtrygrep + (pushd %srcdir% & %pcre2grep% -u --posix-digit --colour=always "A\d" ./testdata/grepinput8 & popd) >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + %cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout% + if ERRORLEVEL 1 exit /b 1 + +) else ( + echo Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library +) + + +:: We go to some contortions to try to ensure that the tests for the various +:: newline settings will work in environments where the normal newline sequence +:: is not \n. Do not use exported files, whose line endings might be changed. +:: Instead, create an input file so that its contents are exactly what we want. +:: These tests are run in the build directory. + +echo Testing pcre2grep newline settings +%printf% "abc\rdef\r\nghi\njkl" >testNinputgrep + +echo ---------------------------- Test N1 ------------------------------>testtrygrep +%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n -N CR "^def" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N2 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N3 ------------------------------>>testtrygrep +for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a +%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N4 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=crlf -F -f %srcdir%\testdata\greppatN4 testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N5 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N6 ------------------------------>>testtrygrep +%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N7 ------------------------------>>testtrygrep +%printf% "xyz\0abc\0def" >testNinputgrep +%pcre2grep% -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep +%pcre2grep% -B1 -na --newline=nul "^(abc|def)" testNinputgrep | %trnull% >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +echo ---------------------------- Test N8 ------------------------------>>testtrygrep +%pcre2grep% -na --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep +echo RC=^%ERRORLEVEL%>>testtrygrep + +%printf% "\n" >>testtrygrep + +%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout% +if ERRORLEVEL 1 exit /b 1 + + +:: These newline tests need UTF support. + +if %utf8% neq 0 ( + echo Testing pcre2grep newline settings with UTF-8 features + + echo ---------------------------- Test UN1 ------------------------------>testtrygrep + %pcre2grep% -nau --newline=anycrlf "^(abc|def)" %srcdir%\testdata\grepinputUN >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + echo ---------------------------- Test UN2 ------------------------------>testtrygrep + %pcre2grep% -nauU --newline=anycrlf "^a" %srcdir%\testdata\grepinputBad8_Trail >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + %printf% "\n" >>testtrygrep + + %cf% %srcdir%\testdata\grepoutputUN testtrygrep %cfout% + if ERRORLEVEL 1 exit /b 1 + +) else ( + echo Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library +) + + +:: If pcre2grep supports script callouts, run some tests on them. It is possible +:: to restrict these callouts to the non-fork case, either for security, or for +:: environments that do not support fork(). This is handled by comparing to a +:: different output. + +%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported" +if %ERRORLEVEL% equ 0 ( + echo Testing pcre2grep script callouts + + echo --- Test 1 --->testtrygrep + %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 2 --->>testtrygrep + %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 3 --->>testtrygrep + %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 4 --->>testtrygrep + %pcre2grep% "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$1$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 5 --->>testtrygrep + %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 6 --->>testtrygrep + %pcre2grep% -m1 "(T)(?C'|$0:$1:$x{41}$o{101}$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + %pcre2grep% --help | %pcre2grep% -q "Non-fork callout scripts in patterns are supported" + if ^!ERRORLEVEL! equ 0 ( + set nonfork=1 + %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout% + ) else ( + set nonfork=0 + %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout% + ) + if ERRORLEVEL 1 exit /b 1 + + @REM These callout tests need UTF support. + + if %utf8% neq 0 ( + echo Testing pcre2grep script callout with UTF-8 features + + echo --- Test 1 --->testtrygrep + %pcre2grep% -u "(T)(?C'|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + echo --- Test 2 --->>testtrygrep + %pcre2grep% -u "(T)(?C'cscript|//nologo|printf.js|%%s\r\n|$0:$x{a6}$n')" %srcdir%\testdata\grepinputv >>testtrygrep + echo RC=^!ERRORLEVEL!>>testtrygrep + + if ^!nonfork! equ 1 ( + %cf% %srcdir%\testdata\grepoutputCNU testtrygrep %cfout% + ) else ( + %cf% %srcdir%\testdata\grepoutputCU testtrygrep %cfout% + ) + if ERRORLEVEL 1 exit /b 1 + + ) else ( + echo Skipping pcre2grep script callout UTF-8 tests: no UTF-8 support in PCRE2 library + ) + +) else ( + echo Script callouts are not supported +) + + +:: Finally, some tests to exercise code that is not tested above, just to be +:: sure that it runs OK. Doing this improves the coverage statistics. The output +:: is not checked. + +echo Testing miscellaneous pcre2grep arguments (unchecked) +echo. >nul 2>testtrygrep +call :checkspecial "-xxxxx" 2 || exit /b 1 +call :checkspecial "--help" 0 || exit /b 1 +call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1 +call :checkspecial "--line-buffered --color abc nul" 1 || exit /b 1 +call :checkspecial "-dskip abc ." 1 || exit /b 1 +call :checkspecial "-Dread -Dskip abc nul" 1 || exit /b 1 + + +:: Clean up local working files +del testcf printf.js trnull.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep + +exit /b 0 + +:: ------ Function to run and check a special pcre2grep arguments test ------- + +:checkspecial + %pcre2grep% %~1 >>testtrygrep 2>&1 + if %ERRORLEVEL% neq %2 ( + echo ** pcre2grep %~1 failed - check testtrygrep + exit /b 1 + ) + exit /b 0 + +:: End diff --git a/3rd/pcre2/RunTest b/3rd/pcre2/RunTest new file mode 100644 index 00000000..dafef3e2 --- /dev/null +++ b/3rd/pcre2/RunTest @@ -0,0 +1,947 @@ +#! /bin/sh + +############################################################################### +# Run the PCRE2 tests using the pcre2test program. The appropriate tests are +# selected, depending on which build-time options were used. +# +# When JIT support is available, all appropriate tests are run with and without +# JIT, unless "-nojit" is given on the command line. There are also two tests +# for JIT-specific features, one to be run when JIT support is available +# (unless "-nojit" is specified), and one when it is not. +# +# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also +# possible to select which to test by giving "-8", "-16" or "-32" on the +# command line. +# +# As well as "-nojit", "-8", "-16", and "-32", arguments for this script are +# individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the +# end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10" +# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests +# except test 10. Whatever order the arguments are in, these tests are always +# run in numerical order. +# +# If no specific tests are selected (which is the case when this script is run +# via 'make check') the default is to run all the numbered tests. +# +# There may also be named (as well as numbered) tests for special purposes. At +# present there is just one, called "heap". This test's output contains the +# sizes of heap frames and frame vectors, which depend on the environment. It +# is therefore not run unless explicitly requested. +# +# Inappropriate tests are automatically skipped (with a comment to say so). For +# example, if JIT support is not compiled, test 16 is skipped, whereas if JIT +# support is compiled, test 15 is skipped. +# +# Other arguments can be one of the words "-valgrind", "-valgrind-log", or +# "-sim" followed by an argument to run cross-compiled executables under a +# simulator, for example: +# +# RunTest 3 -sim "qemu-arm -s 8388608" +# +# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may +# be given without the leading "-" character. +# +# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need +# very much more stack than normal. In environments where the stack can be +# set at runtime, -bigstack sets a gigantic stack. +# +# There are two special cases where only one argument is allowed: +# +# If the first and only argument is "ebcdic", the script runs the special +# EBCDIC test that can be useful for checking certain EBCDIC features, even +# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for +# this test to be run. +# +# If the script is obeyed as "RunTest list", a list of available tests is +# output, but none of them are run. +############################################################################### + +# Define test titles in variables so that they can be output as a list. Some +# of them are modified (e.g. with -8 or -16) when used in the actual tests. + +title0="Test 0: Unchecked pcre2test argument tests (to improve coverage)" +title1="Test 1: Main non-UTF, non-UCP functionality (compatible with Perl >= 5.10)" +title2="Test 2: API, errors, internals and non-Perl stuff" +title3="Test 3: Locale-specific features" +title4A="Test 4: UTF" +title4B=" and Unicode property support (compatible with Perl >= 5.10)" +title5A="Test 5: API, internals, and non-Perl stuff for UTF" +title5B=" and UCP support" +title6="Test 6: DFA matching main non-UTF, non-UCP functionality" +title7A="Test 7: DFA matching with UTF" +title7B=" and Unicode property support" +title8="Test 8: Internal offsets and code size tests" +title9="Test 9: Specials for the basic 8-bit library" +title10="Test 10: Specials for the 8-bit library with UTF-8 and UCP support" +title11="Test 11: Specials for the basic 16-bit and 32-bit libraries" +title12="Test 12: Specials for the 16-bit and 32-bit libraries UTF and UCP support" +title13="Test 13: DFA specials for the basic 16-bit and 32-bit libraries" +title14="Test 14: DFA specials for UTF and UCP support" +title15="Test 15: Non-JIT limits and other non-JIT tests" +title16="Test 16: JIT-specific features when JIT is not available" +title17="Test 17: JIT-specific features when JIT is available" +title18="Test 18: Tests of the POSIX interface, excluding UTF/UCP" +title19="Test 19: Tests of the POSIX interface with UTF/UCP" +title20="Test 20: Serialization and code copy tests" +title21="Test 21: \C tests without UTF (supported for DFA matching)" +title22="Test 22: \C tests with UTF (not supported for DFA matching)" +title23="Test 23: \C disabled test" +title24="Test 24: Non-UTF pattern conversion tests" +title25="Test 25: UTF pattern conversion tests" +title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)" +title27="Test 27: Auto-generated unicode property tests" +maxtest=27 +titleheap="Test 'heap': Environment-specific heap tests" + +if [ $# -eq 1 -a "$1" = "list" ]; then + echo $title0 + echo $title1 + echo $title2 "(not UTF or UCP)" + echo $title3 + echo $title4A $title4B + echo $title5A $title5B + echo $title6 + echo $title7A $title7B + echo $title8 + echo $title9 + echo $title10 + echo $title11 + echo $title12 + echo $title13 + echo $title14 + echo $title15 + echo $title16 + echo $title17 + echo $title18 + echo $title19 + echo $title20 + echo $title21 + echo $title22 + echo $title23 + echo $title24 + echo $title25 + echo $title26 + echo $title27 + echo "" + echo $titleheap + echo "" + echo "Numbered tests are automatically run if nothing selected." + echo "Named tests must be explicitly selected." + exit 0 +fi + +# Set up a suitable "diff" command for comparison. Some systems +# have a diff that lacks a -u option. Try to deal with this. + +cf="diff" +diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u" + +# Find the test data + +if [ -n "$srcdir" -a -d "$srcdir" ] ; then + testdata="$srcdir/testdata" +elif [ -d "./testdata" ] ; then + testdata=./testdata +elif [ -d "../testdata" ] ; then + testdata=../testdata +else + echo "Cannot find the testdata directory" + exit 1 +fi + + +# ------ Function to check results of a test ------- + +# This function is called with three parameters: +# +# $1 the value of $? after a call to pcre2test +# $2 the suffix of the output file to compare with +# $3 the $opt value (empty, -jit, or -dfa) +# +# Note: must define using name(), not "function name", for Solaris. + +checkresult() + { + if [ $1 -ne 0 ] ; then + echo "** pcre2test failed - check testtry" + exit 1 + fi + case "$3" in + -jit) with=" with JIT";; + -dfa) with=" with DFA";; + *) with="";; + esac + $cf $testdata/testoutput$2 testtry + if [ $? != 0 ] ; then + echo "" + echo "** Test $2 failed$with" + exit 1 + fi + echo " OK$with" + } + + +# ------ Function to run and check a special pcre2test arguments test ------- + +checkspecial() + { + $sim $valgrind $vjs $pcre2test $1 >>testtry + if [ $? -ne 0 ] ; then + echo "** pcre2test $1 failed - check testtry" + exit 1 + fi + } + + +# ------ Test setup ------ + +# Default values + +arg8= +arg16= +arg32= +nojit= +bigstack= +sim= +skip= +valgrind= +vjs= +: ${pcre2test:=./pcre2test} + +# This is in case the caller has set aliases (as I do - PH) +unset cp ls mv rm + +if [ ! -x $pcre2test ] ; then + echo "** $pcre2test does not exist or is not executable." + exit 1 +fi + +# Process options and select which tests to run; for those that are explicitly +# requested, check that the necessary optional facilities are available. + +do0=no +do1=no +do2=no +do3=no +do4=no +do5=no +do6=no +do7=no +do8=no +do9=no +do10=no +do11=no +do12=no +do13=no +do14=no +do15=no +do16=no +do17=no +do18=no +do19=no +do20=no +do21=no +do22=no +do23=no +do24=no +do25=no +do26=no +do27=no +doheap=no +doebcdic=no + +while [ $# -gt 0 ] ; do + case $1 in + 0) do0=yes;; + 1) do1=yes;; + 2) do2=yes;; + 3) do3=yes;; + 4) do4=yes;; + 5) do5=yes;; + 6) do6=yes;; + 7) do7=yes;; + 8) do8=yes;; + 9) do9=yes;; + 10) do10=yes;; + 11) do11=yes;; + 12) do12=yes;; + 13) do13=yes;; + 14) do14=yes;; + 15) do15=yes;; + 16) do16=yes;; + 17) do17=yes;; + 18) do18=yes;; + 19) do19=yes;; + 20) do20=yes;; + 21) do21=yes;; + 22) do22=yes;; + 23) do23=yes;; + 24) do24=yes;; + 25) do25=yes;; + 26) do26=yes;; + 27) do27=yes;; + heap) doheap=yes;; + ebcdic) doebcdic=yes;; + -8) arg8=yes;; + -16) arg16=yes;; + -32) arg32=yes;; + bigstack|-bigstack) bigstack=yes;; + nojit|-nojit) nojit=yes;; + sim|-sim) shift; sim=$1;; + valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";; + valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";; + ~*) + if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then + skip="$skip `expr "$1" : '~\([0-9]*\)*$'`" + else + echo "Unknown option or test selector '$1'"; exit 1 + fi + ;; + *-*) + if expr "$1" : '[0-9][0-9]*-[0-9]*$' >/dev/null; then + tf=`expr "$1" : '\([0-9]*\)'` + tt=`expr "$1" : '.*-\([0-9]*\)'` + if [ "$tt" = "" ] ; then tt=$maxtest; fi + if expr \( "$tt" ">" "$maxtest" \) >/dev/null; then + echo "Invalid test range '$1'"; exit 1 + fi + while expr "$tf" "<=" "$tt" >/dev/null; do + eval do${tf}=yes + tf=`expr $tf + 1` + done + else + echo "Invalid test range '$1'"; exit 1 + fi + ;; + *) echo "Unknown option or test selector '$1'"; exit 1;; + esac + shift +done + +# Find which optional facilities are available. + +$sim $pcre2test -C linksize >/dev/null +link_size=$? +if [ $link_size -lt 2 ] ; then + echo "RunTest: Failed to find internal link size" + exit 1 +fi +if [ $link_size -gt 4 ] ; then + echo "RunTest: Failed to find internal link size" + exit 1 +fi + +# If it is possible to set the system stack size and -bigstack was given, +# set up a large stack. + +$sim $pcre2test -S 32 /dev/null /dev/null +support_setstack=$? +if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then + setstack="-S 32" +else + setstack="" +fi + +# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only +# one need be. + +$sim $pcre2test -C pcre2-8 >/dev/null +support8=$? +$sim $pcre2test -C pcre2-16 >/dev/null +support16=$? +$sim $pcre2test -C pcre2-32 >/dev/null +support32=$? + +# \C may be disabled + +$sim $pcre2test -C backslash-C >/dev/null +supportBSC=$? + +# Initialize all bitsizes skipped + +test8=skip +test16=skip +test32=skip + +# If no bitsize arguments, select all that are available + +if [ "$arg8$arg16$arg32" = "" ] ; then + if [ $support8 -ne 0 ] ; then + test8=-8 + fi + if [ $support16 -ne 0 ] ; then + test16=-16 + fi + if [ $support32 -ne 0 ] ; then + test32=-32 + fi + +# Otherwise, select requested bit sizes + +else + if [ "$arg8" = yes ] ; then + if [ $support8 -eq 0 ] ; then + echo "Cannot run 8-bit library tests: 8-bit library not compiled" + exit 1 + fi + test8=-8 + fi + if [ "$arg16" = yes ] ; then + if [ $support16 -eq 0 ] ; then + echo "Cannot run 16-bit library tests: 16-bit library not compiled" + exit 1 + fi + test16=-16 + fi + if [ "$arg32" = yes ] ; then + if [ $support32 -eq 0 ] ; then + echo "Cannot run 32-bit library tests: 32-bit library not compiled" + exit 1 + fi + test32=-32 + fi +fi + +# UTF support is implied by Unicode support, and it always applies to all bit +# sizes if both are supported; we can't have UTF-8 support without UTF-16 or +# UTF-32 support. + +$sim $pcre2test -C unicode >/dev/null +utf=$? + +# When JIT is used with valgrind, we need to set up valgrind suppressions as +# otherwise there are a lot of false positive valgrind reports when the +# the hardware supports SSE2. + +jitopt= +$sim $pcre2test -C jit >/dev/null +jit=$? +if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then + jitopt=-jit + if [ "$valgrind" != "" ] ; then + vjs="--suppressions=$testdata/valgrind-jit.supp" + fi +fi + +# If no specific tests were requested, select all the numbered tests. Those +# that are not relevant will be automatically skipped. + +if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ + $do4 = no -a $do5 = no -a $do6 = no -a $do7 = no -a \ + $do8 = no -a $do9 = no -a $do10 = no -a $do11 = no -a \ + $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ + $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \ + $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \ + $do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \ + $doheap = no -a $doebcdic = no \ + ]; then + do0=yes + do1=yes + do2=yes + do3=yes + do4=yes + do5=yes + do6=yes + do7=yes + do8=yes + do9=yes + do10=yes + do11=yes + do12=yes + do13=yes + do14=yes + do15=yes + do16=yes + do17=yes + do18=yes + do19=yes + do20=yes + do21=yes + do22=yes + do23=yes + do24=yes + do25=yes + do26=yes + do27=yes +fi + +# Handle any explicit skips at this stage, so that an argument list may consist +# only of explicit skips. + +for i in $skip; do eval do$i=no; done + +# Show which release and which test data + +echo "" +echo PCRE2 C library tests using test data from $testdata +$sim $pcre2test /dev/null +echo "" + + +# ------ Normal Tests ------ + +for bmode in "$test8" "$test16" "$test32"; do + case "$bmode" in + skip) continue;; + -16) if [ "$test8$test32" != "skipskip" ] ; then echo ""; fi + bits=16; echo "---- Testing 16-bit library ----"; echo "";; + -32) if [ "$test8$test16" != "skipskip" ] ; then echo ""; fi + bits=32; echo "---- Testing 32-bit library ----"; echo "";; + -8) bits=8; echo "---- Testing 8-bit library ----"; echo "";; + esac + + # Test 0 is a special test. Its output is not checked, because it will + # be different on different hardware and with different configurations. + # Running this test just exercises the code. + + if [ $do0 = yes ] ; then + echo $title0 + echo '/abc/jit,memory,framesize' >testSinput + echo ' abc' >>testSinput + echo '' >testtry + checkspecial '-C' + checkspecial '--help' + if [ $support_setstack -eq 0 ] ; then + checkspecial '-S 1 -t 10 testSinput' + fi + echo " OK" + fi + + # Primary non-UTF test, compatible with JIT and all versions of Perl >= 5.8 + + if [ $do1 = yes ] ; then + echo $title1 + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput1 testtry + checkresult $? 1 "$opt" + done + fi + + # PCRE2 tests that are not Perl-compatible: API, errors, internals. We copy + # the testbtables file to the current directory for use by this test. + + if [ $do2 = yes ] ; then + echo $title2 "(excluding UTF-$bits)" + cp $testdata/testbtables . + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry + saverc=$? + if [ $saverc = 0 ] ; then + $sim $valgrind ${opt:+$vjs} $pcre2test -q $bmode $opt -error -80,-62,-2,-1,0,100,101,191,300 >>testtry + checkresult $? 2 "$opt" + else + checkresult $saverc 2 "$opt" + fi + done + fi + + # Locale-specific tests, provided that either the "fr_FR", "fr_CA", "french" + # or "fr" locale is available. The first two are Unix-like standards; the + # last two are for Windows. Unfortunately, different versions of the French + # locale give different outputs for some items. This test passes if the + # output matches any one of the alternative output files. + + if [ $do3 = yes ] ; then + locale= + + # In some environments locales that are listed by the "locale -a" + # command do not seem to work with setlocale(). Therefore, we do + # a preliminary test to see if pcre2test can set one before going + # on to use it. + + for loc in 'fr_FR' 'french' 'fr' 'fr_CA'; do + locale -a | grep "^$loc\$" >/dev/null + if [ $? -eq 0 ] ; then + echo "/a/locale=$loc" | \ + $sim $valgrind $pcre2test -q $bmode | \ + grep "Failed to set locale" >/dev/null + if [ $? -ne 0 ] ; then + locale=$loc + if [ "$locale" = "fr_FR" ] ; then + infile=$testdata/testinput3 + outfile=$testdata/testoutput3 + outfile2=$testdata/testoutput3A + outfile3=$testdata/testoutput3B + else + infile=test3input + outfile=test3output + outfile2=test3outputA + outfile3=test3outputB + sed "s/fr_FR/$loc/" $testdata/testinput3 >test3input + sed "s/fr_FR/$loc/" $testdata/testoutput3 >test3output + sed "s/fr_FR/$loc/" $testdata/testoutput3A >test3outputA + sed "s/fr_FR/$loc/" $testdata/testoutput3B >test3outputB + fi + break + fi + fi + done + + if [ "$locale" != "" ] ; then + echo $title3 "(using '$locale' locale)" + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $infile testtry + if [ $? = 0 ] ; then + case "$opt" in + -jit) with=" with JIT";; + *) with="";; + esac + if $cf $outfile testtry >teststdout || \ + $cf $outfile2 testtry >teststdout || \ + $cf $outfile3 testtry >teststdout + then + echo " OK$with" + else + echo "** Locale test did not run successfully$with. The output did not match" + echo " $outfile, $outfile2 or $outfile3." + echo " This may mean that there is a problem with the locale settings rather" + echo " than a bug in PCRE2." + exit 1 + fi + else exit 1 + fi + done + else + echo "Cannot test locale-specific features - none of the 'fr_FR', 'fr_CA'," + echo "'fr' or 'french' locales can be set, or the \"locale\" command is" + echo "not available to check for them." + echo " " + fi + fi + + # Tests for UTF and Unicode property support + + if [ $do4 = yes ] ; then + echo ${title4A}-${bits}${title4B} + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput4 testtry + checkresult $? 4 "$opt" + done + fi + fi + + if [ $do5 = yes ] ; then + echo ${title5A}-${bits}$title5B + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput5 testtry + checkresult $? 5 "$opt" + done + fi + fi + + # Tests for DFA matching support + + if [ $do6 = yes ] ; then + echo $title6 + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput6 testtry + checkresult $? 6 "" + fi + + if [ $do7 = yes ] ; then + echo ${title7A}-${bits}$title7B + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput7 testtry + checkresult $? 7 "" + fi + fi + + # Test of internal offsets and code sizes. This test is run only when there + # is UTF/UCP support. The actual tests are mostly the same as in some of the + # above, but in this test we inspect some offsets and sizes. This is a + # doublecheck for the maintainer, just in case something changes unexpectedly. + # The output from this test is different in 8-bit, 16-bit, and 32-bit modes + # and for different link sizes, so there are different output files for each + # mode and link size. + + if [ $do8 = yes ] ; then + echo $title8 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput8 testtry + checkresult $? 8-$bits-$link_size "" + fi + fi + + # Tests for 8-bit-specific features + + if [ "$do9" = yes ] ; then + echo $title9 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput9 testtry + checkresult $? 9 "$opt" + done + fi + fi + + # Tests for UTF-8 and UCP 8-bit-specific features + + if [ "$do10" = yes ] ; then + echo $title10 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput10 testtry + checkresult $? 10 "$opt" + done + fi + fi + + # Tests for 16-bit and 32-bit features. Output is different for the two widths. + + if [ $do11 = yes ] ; then + echo $title11 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput11 testtry + checkresult $? 11-$bits "$opt" + done + fi + fi + + # Tests for 16-bit and 32-bit features with UTF-16/32 and UCP support. Output + # is different for the two widths. + + if [ $do12 = yes ] ; then + echo $title12 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput12 testtry + checkresult $? 12-$bits "$opt" + done + fi + fi + + # Tests for 16/32-bit-specific features in DFA non-UTF modes + + if [ $do13 = yes ] ; then + echo $title13 + if [ "$bits" = "8" ] ; then + echo " Skipped when running 8-bit tests" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput13 testtry + checkresult $? 13 "" + fi + fi + + # Tests for DFA UTF and UCP features. Output is different for the different widths. + + if [ $do14 = yes ] ; then + echo $title14 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput14 testtry + checkresult $? 14-$bits "" + fi + fi + + # Test non-JIT match and recursion limits + + if [ $do15 = yes ] ; then + echo $title15 + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput15 testtry + checkresult $? 15 "" + fi + + # Test JIT-specific features when JIT is not available + + if [ $do16 = yes ] ; then + echo $title16 + if [ $jit -ne 0 ] ; then + echo " Skipped because JIT is available" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput16 testtry + checkresult $? 16 "" + fi + fi + + # Test JIT-specific features when JIT is available + + if [ $do17 = yes ] ; then + echo $title17 + if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then + echo " Skipped because JIT is not available or nojit was specified" + else + $sim $valgrind $vjs $pcre2test -q $setstack $bmode $testdata/testinput17 testtry + checkresult $? 17 "" + fi + fi + + # Tests for the POSIX interface without UTF/UCP (8-bit only) + + if [ $do18 = yes ] ; then + echo $title18 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput18 testtry + checkresult $? 18 "" + fi + fi + + # Tests for the POSIX interface with UTF/UCP (8-bit only) + + if [ $do19 = yes ] ; then + echo $title19 + if [ "$bits" = "16" -o "$bits" = "32" ] ; then + echo " Skipped when running 16/32-bit tests" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput19 testtry + checkresult $? 19 "" + fi + fi + + # Serialization tests + + if [ $do20 = yes ] ; then + echo $title20 + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput20 testtry + checkresult $? 20 "" + fi + + # \C tests without UTF - DFA matching is supported + + if [ "$do21" = yes ] ; then + echo $title21 + if [ $supportBSC -eq 0 ] ; then + echo " Skipped because \C is disabled" + else + for opt in "" $jitopt -dfa; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput21 testtry + checkresult $? 21 "$opt" + done + fi + fi + + # \C tests with UTF - DFA matching is not supported for \C in UTF mode + + if [ "$do22" = yes ] ; then + echo $title22 + if [ $supportBSC -eq 0 ] ; then + echo " Skipped because \C is disabled" + elif [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput22 testtry + checkresult $? 22-$bits "$opt" + done + fi + fi + + # Test when \C is disabled + + if [ "$do23" = yes ] ; then + echo $title23 + if [ $supportBSC -ne 0 ] ; then + echo " Skipped because \C is not disabled" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput23 testtry + checkresult $? 23 "" + fi + fi + + # Non-UTF pattern conversion tests + + if [ "$do24" = yes ] ; then + echo $title24 + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput24 testtry + checkresult $? 24 "" + fi + + # UTF pattern conversion tests + + if [ "$do25" = yes ] ; then + echo $title25 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinput25 testtry + checkresult $? 25 "" + fi + fi + + # Unicode property tests + + if [ $do26 = yes ] ; then + echo $title26 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry + checkresult $? 26 "$opt" + done + fi + fi + + # Auto-generated Unicode property tests + + if [ $do27 = yes ] ; then + echo $title27 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} $pcre2test -q $setstack $bmode $opt $testdata/testinput27 testtry + checkresult $? 27 "$opt" + done + fi + fi + + # Manually selected heap tests - output may vary in different environments, + # which is why that are not automatically run. + + if [ $doheap = yes ] ; then + echo $titleheap + $sim $valgrind $pcre2test -q $setstack $bmode $testdata/testinputheap testtry + checkresult $? heap-$bits "" + fi + +# End of loop for 8/16/32-bit tests +done + + +# ------ Special EBCDIC Test ------- + +if [ $doebcdic = yes ] ; then + $sim $valgrind $pcre2test -C ebcdic >/dev/null + ebcdic=$? + if [ $ebcdic -ne 1 ] ; then + echo "Cannot run EBCDIC tests: EBCDIC support not compiled" + exit 1 + fi + for opt in "" "-dfa"; do + $sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry + checkresult $? EBC "$opt" + done +fi + + +# Clean up local working files +rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry + +# End diff --git a/3rd/pcre2/RunTest.bat b/3rd/pcre2/RunTest.bat new file mode 100644 index 00000000..500b0f4d --- /dev/null +++ b/3rd/pcre2/RunTest.bat @@ -0,0 +1,564 @@ +@echo off +@rem +@rem MS Windows batch file to run pcre2test on testfiles with the correct +@rem options. This file must use CRLF linebreaks to function properly, +@rem and requires both pcre2test and pcre2grep. +@rem +@rem ------------------------ HISTORY ---------------------------------- +@rem This file was originally contributed to PCRE1 by Ralf Junker, and touched +@rem up by Daniel Richard G. Tests 10-12 added by Philip H. +@rem Philip H also changed test 3 to use "wintest" files. +@rem +@rem Updated by Tom Fortmann to support explicit test numbers on the command +@rem line. Added argument validation and added error reporting. +@rem +@rem Sheri Pierce added logic to skip feature dependent tests +@rem tests 4 5 7 10 12 14 19 22 25 and 26 require Unicode support +@rem 8 requires Unicode and link size 2 +@rem 16 requires absence of jit support +@rem 17 requires presence of jit support +@rem Sheri P also added override tests for study and jit testing +@rem Zoltan Herczeg added libpcre16 support +@rem Zoltan Herczeg added libpcre32 support +@rem ------------------------------------------------------------------- +@rem +@rem The file was converted for PCRE2 by PH, February 2015. +@rem Updated for new test 14 (moving others up a number), August 2015. +@rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015. +@rem PH added missing "set type" for test 22, April 2016. +@rem PH added copy command for new testbtables file, November 2020 +@rem PH caused it to show comparison output when comparison failed, July 2023 +@rem PH updated unknown error number in test + + +setlocal enabledelayedexpansion +if [%srcdir%]==[] ( +if exist testdata\ set srcdir=.) +if [%srcdir%]==[] ( +if exist ..\testdata\ set srcdir=..) +if [%srcdir%]==[] ( +if exist ..\..\testdata\ set srcdir=..\..) +if NOT exist %srcdir%\testdata\ ( +echo Error: distribution testdata folder not found! +call :conferror +exit /b 1 +goto :eof +) + +if [%pcre2test%]==[] set pcre2test=.\pcre2test.exe + +echo source dir is %srcdir% +echo pcre2test=%pcre2test% + +if NOT exist %pcre2test% ( +echo Error: %pcre2test% not found! +echo. +call :conferror +exit /b 1 +) + +%pcre2test% -C linksize >NUL +set link_size=%ERRORLEVEL% +%pcre2test% -C pcre2-8 >NUL +set support8=%ERRORLEVEL% +%pcre2test% -C pcre2-16 >NUL +set support16=%ERRORLEVEL% +%pcre2test% -C pcre2-32 >NUL +set support32=%ERRORLEVEL% +%pcre2test% -C unicode >NUL +set unicode=%ERRORLEVEL% +%pcre2test% -C jit >NUL +set jit=%ERRORLEVEL% +%pcre2test% -C backslash-C >NUL +set supportBSC=%ERRORLEVEL% + +if %support8% EQU 1 ( +if not exist testout8 md testout8 +if not exist testoutjit8 md testoutjit8 +) + +if %support16% EQU 1 ( +if not exist testout16 md testout16 +if not exist testoutjit16 md testoutjit16 +) + +if %support32% EQU 1 ( +if not exist testout32 md testout32 +if not exist testoutjit32 md testoutjit32 +) + +set do1=no +set do2=no +set do3=no +set do4=no +set do5=no +set do6=no +set do7=no +set do8=no +set do9=no +set do10=no +set do11=no +set do12=no +set do13=no +set do14=no +set do15=no +set do16=no +set do17=no +set do18=no +set do19=no +set do20=no +set do21=no +set do22=no +set do23=no +set do24=no +set do25=no +set do26=no +set do27=no +set all=yes + +for %%a in (%*) do ( + set valid=no + for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27) do if %%v == %%a set valid=yes + if "!valid!" == "yes" ( + set do%%a=yes + set all=no + ) else ( + echo Invalid test number - %%a! + echo Usage %0 [ test_number ] ... + echo Where test_number is one or more optional test numbers 1 through 27, default is all tests. + exit /b 1 + ) +) +set failed="no" + +if "%all%" == "yes" ( + set do1=yes + set do2=yes + set do3=yes + set do4=yes + set do5=yes + set do6=yes + set do7=yes + set do8=yes + set do9=yes + set do10=yes + set do11=yes + set do12=yes + set do13=yes + set do14=yes + set do15=yes + set do16=yes + set do17=yes + set do18=yes + set do19=yes + set do20=yes + set do21=yes + set do22=yes + set do23=yes + set do24=yes + set do25=yes + set do26=yes + set do27=yes +) + +@echo RunTest.bat's pcre2test output is written to newly created subfolders +@echo named testout{8,16,32} and testoutjit{8,16,32}. +@echo. + +set mode= +set bits=8 + +:nextMode +if "%mode%" == "" ( + if %support8% EQU 0 goto modeSkip + echo. + echo ---- Testing 8-bit library ---- + echo. +) +if "%mode%" == "-16" ( + if %support16% EQU 0 goto modeSkip + echo. + echo ---- Testing 16-bit library ---- + echo. +) +if "%mode%" == "-32" ( + if %support32% EQU 0 goto modeSkip + echo. + echo ---- Testing 32-bit library ---- + echo. +) +if "%do1%" == "yes" call :do1 +if "%do2%" == "yes" call :do2 +if "%do3%" == "yes" call :do3 +if "%do4%" == "yes" call :do4 +if "%do5%" == "yes" call :do5 +if "%do6%" == "yes" call :do6 +if "%do7%" == "yes" call :do7 +if "%do8%" == "yes" call :do8 +if "%do9%" == "yes" call :do9 +if "%do10%" == "yes" call :do10 +if "%do11%" == "yes" call :do11 +if "%do12%" == "yes" call :do12 +if "%do13%" == "yes" call :do13 +if "%do14%" == "yes" call :do14 +if "%do15%" == "yes" call :do15 +if "%do16%" == "yes" call :do16 +if "%do17%" == "yes" call :do17 +if "%do18%" == "yes" call :do18 +if "%do19%" == "yes" call :do19 +if "%do20%" == "yes" call :do20 +if "%do21%" == "yes" call :do21 +if "%do22%" == "yes" call :do22 +if "%do23%" == "yes" call :do23 +if "%do24%" == "yes" call :do24 +if "%do25%" == "yes" call :do25 +if "%do26%" == "yes" call :do26 +if "%do27%" == "yes" call :do27 +:modeSkip +if "%mode%" == "" ( + set mode=-16 + set bits=16 + goto nextMode +) +if "%mode%" == "-16" ( + set mode=-32 + set bits=32 + goto nextMode +) + +@rem If mode is -32, testing is finished +if %failed% == "yes" ( +echo In above output, one or more of the various tests failed! +exit /b 1 +) +echo All OK +goto :eof + +:runsub +@rem Function to execute pcre2test and compare the output +@rem Arguments are as follows: +@rem +@rem 1 = test number +@rem 2 = outputdir +@rem 3 = test name use double quotes +@rem 4 - 9 = pcre2test options + +if [%1] == [] ( + echo Missing test number argument! + exit /b 1 +) + +if [%2] == [] ( + echo Missing outputdir! + exit /b 1 +) + +if [%3] == [] ( + echo Missing test name argument! + exit /b 1 +) + +if %1 == 8 ( + set outnum=%1-%bits%-%link_size% +) else if %1 == 11 ( + set outnum=%1-%bits% +) else if %1 == 12 ( + set outnum=%1-%bits% +) else if %1 == 14 ( + set outnum=%1-%bits% +) else if %1 == 22 ( + set outnum=%1-%bits% +) else ( + set outnum=%1 +) +set testinput=testinput%1 +set testoutput=testoutput%outnum% +if exist %srcdir%\testdata\win%testinput% ( + set testinput=wintestinput%1 + set testoutput=wintestoutput%outnum% +) + +echo Test %1: %3 +%pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput% +if errorlevel 1 ( + echo. failed executing command-line: + echo. %pcre2test% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput% + set failed="yes" + goto :eof +) else if [%1]==[2] ( + %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -80,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput% +) + +fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% >NUL + +if errorlevel 1 ( + echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% + if [%1]==[3] ( + echo. + echo ** Test 3 failure usually means french locale is not + echo ** available on the system, rather than a bug or problem with PCRE2. + echo. + goto :eof +) + fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput% + + set failed="yes" + goto :eof +) + +echo. Passed. +goto :eof + +:do1 +call :runsub 1 testout "Main non-UTF, non-UCP functionality (Compatible with Perl >= 5.10)" -q +if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do2 + copy /y %srcdir%\testdata\testbtables testbtables + call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q + if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do3 + call :runsub 3 testout "Locale-specific features" -q + if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do4 +if %unicode% EQU 0 ( + echo Test 4 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 4 testout "UTF-%bits% and Unicode property support - (Compatible with Perl >= 5.10)" -q + if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do5 +if %unicode% EQU 0 ( + echo Test 5 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits% and UCP" -q + if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do6 + call :runsub 6 testout "DFA matching main non-UTF, non-UCP functionality" -q +goto :eof + +:do7 +if %unicode% EQU 0 ( + echo Test 7 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 7 testout "DFA matching with UTF-%bits% and Unicode property support" -q + goto :eof + +:do8 +if NOT %link_size% EQU 2 ( + echo Test 8 Skipped because link size is not 2. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 8 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 8 testout "Internal offsets and code size tests" -q +goto :eof + +:do9 +if NOT %bits% EQU 8 ( + echo Test 9 Skipped when running 16/32-bit tests. + goto :eof +) + call :runsub 9 testout "Specials for the basic 8-bit library" -q + if %jit% EQU 1 call :runsub 9 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do10 +if NOT %bits% EQU 8 ( + echo Test 10 Skipped when running 16/32-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 10 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 10 testout "Specials for the 8-bit library with Unicode support" -q + if %jit% EQU 1 call :runsub 10 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do11 +if %bits% EQU 8 ( + echo Test 11 Skipped when running 8-bit tests. + goto :eof +) + call :runsub 11 testout "Specials for the basic 16/32-bit library" -q + if %jit% EQU 1 call :runsub 11 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do12 +if %bits% EQU 8 ( + echo Test 12 Skipped when running 8-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 12 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 12 testout "Specials for the 16/32-bit library with Unicode support" -q + if %jit% EQU 1 call :runsub 12 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do13 +if %bits% EQU 8 ( + echo Test 13 Skipped when running 8-bit tests. + goto :eof +) + call :runsub 13 testout "DFA specials for the basic 16/32-bit library" -q +goto :eof + +:do14 +if %unicode% EQU 0 ( + echo Test 14 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 14 testout "DFA specials for UTF and UCP support" -q + goto :eof + +:do15 +call :runsub 15 testout "Non-JIT limits and other non_JIT tests" -q +goto :eof + +:do16 +if %jit% EQU 1 ( + echo Test 16 Skipped due to presence of JIT support. + goto :eof +) + call :runsub 16 testout "JIT-specific features when JIT is not available" -q +goto :eof + +:do17 +if %jit% EQU 0 ( + echo Test 17 Skipped due to absence of JIT support. + goto :eof +) + call :runsub 17 testout "JIT-specific features when JIT is available" -q +goto :eof + +:do18 +if %bits% EQU 16 ( + echo Test 18 Skipped when running 16-bit tests. + goto :eof +) +if %bits% EQU 32 ( + echo Test 18 Skipped when running 32-bit tests. + goto :eof +) + call :runsub 18 testout "POSIX interface, excluding UTF-8 and UCP" -q +goto :eof + +:do19 +if %bits% EQU 16 ( + echo Test 19 Skipped when running 16-bit tests. + goto :eof +) +if %bits% EQU 32 ( + echo Test 19 Skipped when running 32-bit tests. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 19 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 19 testout "POSIX interface with UTF-8 and UCP" -q +goto :eof + +:do20 +call :runsub 20 testout "Serialization tests" -q +goto :eof + +:do21 +if %supportBSC% EQU 0 ( + echo Test 21 Skipped due to absence of backslash-C support. + goto :eof +) + call :runsub 21 testout "Backslash-C tests without UTF" -q + call :runsub 21 testout "Backslash-C tests without UTF (DFA)" -q -dfa + if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do22 +if %supportBSC% EQU 0 ( + echo Test 22 Skipped due to absence of backslash-C support. + goto :eof +) +if %unicode% EQU 0 ( + echo Test 22 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 22 testout "Backslash-C tests with UTF" -q + if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do23 +if %supportBSC% EQU 1 ( + echo Test 23 Skipped due to presence of backslash-C support. + goto :eof +) + call :runsub 23 testout "Backslash-C disabled test" -q +goto :eof + +:do24 +call :runsub 24 testout "Non-UTF pattern conversion tests" -q +goto :eof + +:do25 +if %unicode% EQU 0 ( + echo Test 25 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 25 testout "UTF pattern conversion tests" -q +goto :eof + +:do26 +if %unicode% EQU 0 ( + echo Test 26 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 26 testout "Unicode property tests (Compatible with Perl >= 5.38)" -q + if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:do27 +if %unicode% EQU 0 ( + echo Test 27 Skipped due to absence of Unicode support. + goto :eof +) + call :runsub 27 testout "Auto-generated unicode property tests" -q + if %jit% EQU 1 call :runsub 27 testoutjit "Test with JIT Override" -q -jit +goto :eof + +:conferror +@echo. +@echo Either your build is incomplete or you have a configuration error. +@echo. +@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS" +@echo project, pcre2_test.bat defines variables and automatically calls RunTest.bat. +@echo For manual testing of all available features, after configuring with cmake +@echo and building, you can run the built pcre2_test.bat. For best results with +@echo cmake builds and tests avoid directories with full path names that include +@echo spaces for source or build. +@echo. +@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed +@echo for input and verification should be found automatically when (from the +@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat +@echo runs all tests compatible with the linked pcre2 library but it can be given +@echo a test number as an argument. +@echo. +@echo If the build dir is not under the source dir you can either copy your exes +@echo to the source folder or copy RunTest.bat and the testdata folder to the +@echo location of your built exes and then run RunTest.bat. +@echo. +goto :eof diff --git a/3rd/pcre2/SECURITY.md b/3rd/pcre2/SECURITY.md new file mode 100644 index 00000000..1e3a05b9 --- /dev/null +++ b/3rd/pcre2/SECURITY.md @@ -0,0 +1,44 @@ +# Security policies + +## Release security + +The PCRE2 project provides source-only releases, with no binaries. + +These source releases can be downloaded from the +[GitHub Releases](https://github.com/PCRE2Project/pcre2/releases) page. Each +release file is GPG-signed. + +* Releases up to and including 10.44 are signed by Philip Hazel (GPG key: + 45F68D54BBE23FB3039B46E59766E084FB0F43D8) +* Releases from 10.45 onwards will be signed by Nicholas Wilson (GPG key: + A95536204A3BB489715231282A98E77EB6F24CA8, cross-signed by Philip + Hazel's key for release continuity) + +From releases 10.45 onwards, the source code will additionally be provided via +Git checkout of the (GPG-signed) release tag. + +Please contact the maintainers for any queries about release integrity or the +project's supply-chain. + +## Reporting vulnerabilities + +The PCRE2 project prioritises security. We appreciate third-party testing and +security research, and would be grateful if you could responsibly disclose your +findings to us. We will make every effort to acknowledge your contributions. + +To report a security issue, please use the GitHub Security Advisory +["Report a Vulnerability"](https://github.com/PCRE2Project/pcre2/security/advisories/new) +tab. (Alternatively, if you prefer you may send a GPG-encrypted email to one of +the maintainers.) + +### Timeline + +As a very small volunteer team, we cannot guarantee rapid response, but would +aim to respond within 1 week, or perhaps 2 during holidays. + +### Response procedure + +PCRE2 has never previously made a rapid or embargoed release in response to a +security incident. We would work with security managers from trusted downstream +distributors, such as major Linux distributions, before disclosing the +vulnerability publicly. diff --git a/3rd/pcre2/WORKSPACE.bazel b/3rd/pcre2/WORKSPACE.bazel new file mode 100644 index 00000000..4ce2c8ce --- /dev/null +++ b/3rd/pcre2/WORKSPACE.bazel @@ -0,0 +1 @@ +# See MODULE.bazel diff --git a/3rd/pcre2/aclocal.m4 b/3rd/pcre2/aclocal.m4 new file mode 100644 index 00000000..d7c332c2 --- /dev/null +++ b/3rd/pcre2/aclocal.m4 @@ -0,0 +1,1597 @@ +# generated automatically by aclocal 1.16.5 -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. + +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],, +[m4_warning([this file was generated for autoconf 2.71. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically 'autoreconf'.])]) + +# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- +# serial 12 (pkg-config-0.29.2) + +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. + +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29.2]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) +m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility]) +AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path]) +AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path]) + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi +fi[]dnl +])dnl PKG_PROG_PKG_CONFIG + +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurrence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_default([$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. +m4_define([_PKG_CONFIG], +[if test -n "$$1"; then + pkg_cv_[]$1="$$1" + elif test -n "$PKG_CONFIG"; then + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes ], + [pkg_failed=yes]) + else + pkg_failed=untried +fi[]dnl +])dnl _PKG_CONFIG + +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])dnl _PKG_SHORT_ERRORS_SUPPORTED + + +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $2]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + AC_MSG_RESULT([no]) + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + m4_default([$4], [AC_MSG_ERROR( +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT])[]dnl + ]) +elif test $pkg_failed = untried; then + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])[]dnl + ]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + $3 +fi[]dnl +])dnl PKG_CHECK_MODULES + + +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. +AC_DEFUN([PKG_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([pkgconfigdir], + [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],, + [with_pkgconfigdir=]pkg_default) +AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_INSTALLDIR + + +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. +AC_DEFUN([PKG_NOARCH_INSTALLDIR], +[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) +m4_pushdef([pkg_description], + [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@]) +AC_ARG_WITH([noarch-pkgconfigdir], + [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],, + [with_noarch_pkgconfigdir=]pkg_default) +AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) +m4_popdef([pkg_default]) +m4_popdef([pkg_description]) +])dnl PKG_NOARCH_INSTALLDIR + + +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. +AC_DEFUN([PKG_CHECK_VAR], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl + +_PKG_CONFIG([$1], [variable="][$3]["], [$2]) +AS_VAR_COPY([$1], [pkg_cv_][$1]) + +AS_VAR_IF([$1], [""], [$5], [$4])dnl +])dnl PKG_CHECK_VAR + +dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND], +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------ +dnl +dnl Prepare a "--with-" configure option using the lowercase +dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and +dnl PKG_CHECK_MODULES in a single macro. +AC_DEFUN([PKG_WITH_MODULES], +[ +m4_pushdef([with_arg], m4_tolower([$1])) + +m4_pushdef([description], + [m4_default([$5], [build with ]with_arg[ support])]) + +m4_pushdef([def_arg], [m4_default([$6], [auto])]) +m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes]) +m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no]) + +m4_case(def_arg, + [yes],[m4_pushdef([with_without], [--without-]with_arg)], + [m4_pushdef([with_without],[--with-]with_arg)]) + +AC_ARG_WITH(with_arg, + AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),, + [AS_TR_SH([with_]with_arg)=def_arg]) + +AS_CASE([$AS_TR_SH([with_]with_arg)], + [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)], + [auto],[PKG_CHECK_MODULES([$1],[$2], + [m4_n([def_action_if_found]) $3], + [m4_n([def_action_if_not_found]) $4])]) + +m4_popdef([with_arg]) +m4_popdef([description]) +m4_popdef([def_arg]) + +])dnl PKG_WITH_MODULES + +dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ----------------------------------------------- +dnl +dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES +dnl check._[VARIABLE-PREFIX] is exported as make variable. +AC_DEFUN([PKG_HAVE_WITH_MODULES], +[ +PKG_WITH_MODULES([$1],[$2],,,[$3],[$4]) + +AM_CONDITIONAL([HAVE_][$1], + [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"]) +])dnl PKG_HAVE_WITH_MODULES + +dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES, +dnl [DESCRIPTION], [DEFAULT]) +dnl ------------------------------------------------------ +dnl +dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after +dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make +dnl and preprocessor variable. +AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES], +[ +PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4]) + +AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"], + [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])]) +])dnl PKG_HAVE_DEFINE_WITH_MODULES + +# Copyright (C) 2002-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_AUTOMAKE_VERSION(VERSION) +# ---------------------------- +# Automake X.Y traces this macro to ensure aclocal.m4 has been +# generated from the m4 files accompanying Automake X.Y. +# (This private macro should not be called outside this file.) +AC_DEFUN([AM_AUTOMAKE_VERSION], +[am__api_version='1.16' +dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to +dnl require some minimum version. Point them to the right macro. +m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl +]) + +# _AM_AUTOCONF_VERSION(VERSION) +# ----------------------------- +# aclocal traces this macro to find the Autoconf version. +# This is a private macro too. Using m4_define simplifies +# the logic in aclocal, which can simply ignore this definition. +m4_define([_AM_AUTOCONF_VERSION], []) + +# AM_SET_CURRENT_AUTOMAKE_VERSION +# ------------------------------- +# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. +# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. +AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +[AM_AUTOMAKE_VERSION([1.16.5])dnl +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) + +# Copyright (C) 2011-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_AR([ACT-IF-FAIL]) +# ------------------------- +# Try to determine the archiver interface, and trigger the ar-lib wrapper +# if it is needed. If the detection of archiver interface fails, run +# ACT-IF-FAIL (default is to abort configure with a proper error message). +AC_DEFUN([AM_PROG_AR], +[AC_BEFORE([$0], [LT_INIT])dnl +AC_BEFORE([$0], [AC_PROG_LIBTOOL])dnl +AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([ar-lib])dnl +AC_CHECK_TOOLS([AR], [ar lib "link -lib"], [false]) +: ${AR=ar} + +AC_CACHE_CHECK([the archiver ($AR) interface], [am_cv_ar_interface], + [AC_LANG_PUSH([C]) + am_cv_ar_interface=ar + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[int some_variable = 0;]])], + [am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([am_ar_try]) + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + ]) + AC_LANG_POP([C])]) + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + m4_default([$1], + [AC_MSG_ERROR([could not determine $AR interface])]) + ;; +esac +AC_SUBST([AR])dnl +]) + +# AM_AUX_DIR_EXPAND -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets +# $ac_aux_dir to '$srcdir/foo'. In other projects, it is set to +# '$srcdir', '$srcdir/..', or '$srcdir/../..'. +# +# Of course, Automake must honor this variable whenever it calls a +# tool from the auxiliary directory. The problem is that $srcdir (and +# therefore $ac_aux_dir as well) can be either absolute or relative, +# depending on how configure is run. This is pretty annoying, since +# it makes $ac_aux_dir quite unusable in subdirectories: in the top +# source directory, any form will work fine, but in subdirectories a +# relative path needs to be adjusted first. +# +# $ac_aux_dir/missing +# fails when called from a subdirectory if $ac_aux_dir is relative +# $top_srcdir/$ac_aux_dir/missing +# fails if $ac_aux_dir is absolute, +# fails when called from a subdirectory in a VPATH build with +# a relative $ac_aux_dir +# +# The reason of the latter failure is that $top_srcdir and $ac_aux_dir +# are both prefixed by $srcdir. In an in-source build this is usually +# harmless because $srcdir is '.', but things will broke when you +# start a VPATH build or use an absolute $srcdir. +# +# So we could use something similar to $top_srcdir/$ac_aux_dir/missing, +# iff we strip the leading $srcdir from $ac_aux_dir. That would be: +# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` +# and then we would define $MISSING as +# MISSING="\${SHELL} $am_aux_dir/missing" +# This will work as long as MISSING is not called from configure, because +# unfortunately $(top_srcdir) has no meaning in configure. +# However there are other variables, like CC, which are often used in +# configure, and could therefore not use this "fixed" $ac_aux_dir. +# +# Another solution, used here, is to always expand $ac_aux_dir to an +# absolute PATH. The drawback is that using absolute paths prevent a +# configured tree to be moved without reconfiguration. + +AC_DEFUN([AM_AUX_DIR_EXPAND], +[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` +]) + +# AM_CONDITIONAL -*- Autoconf -*- + +# Copyright (C) 1997-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_CONDITIONAL(NAME, SHELL-CONDITION) +# ------------------------------------- +# Define a conditional. +AC_DEFUN([AM_CONDITIONAL], +[AC_PREREQ([2.52])dnl + m4_if([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], + [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl +AC_SUBST([$1_TRUE])dnl +AC_SUBST([$1_FALSE])dnl +_AM_SUBST_NOTMAKE([$1_TRUE])dnl +_AM_SUBST_NOTMAKE([$1_FALSE])dnl +m4_define([_AM_COND_VALUE_$1], [$2])dnl +if $2; then + $1_TRUE= + $1_FALSE='#' +else + $1_TRUE='#' + $1_FALSE= +fi +AC_CONFIG_COMMANDS_PRE( +[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then + AC_MSG_ERROR([[conditional "$1" was never defined. +Usually this means the macro was only invoked conditionally.]]) +fi])]) + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + + +# There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be +# written in clear, in which case automake, when reading aclocal.m4, +# will think it sees a *use*, and therefore will trigger all it's +# C support machinery. Also note that it means that autoscan, seeing +# CC etc. in the Makefile, will ask for an AC_PROG_CC use... + + +# _AM_DEPENDENCIES(NAME) +# ---------------------- +# See how the compiler implements dependency checking. +# NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC". +# We try a few techniques and use that to set a single cache variable. +# +# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was +# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular +# dependency, and given that the user is not expected to run this macro, +# just rely on AC_PROG_CC. +AC_DEFUN([_AM_DEPENDENCIES], +[AC_REQUIRE([AM_SET_DEPDIR])dnl +AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl +AC_REQUIRE([AM_MAKE_INCLUDE])dnl +AC_REQUIRE([AM_DEP_TRACK])dnl + +m4_if([$1], [CC], [depcc="$CC" am_compiler_list=], + [$1], [CXX], [depcc="$CXX" am_compiler_list=], + [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'], + [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'], + [$1], [UPC], [depcc="$UPC" am_compiler_list=], + [$1], [GCJ], [depcc="$GCJ" am_compiler_list='gcc3 gcc'], + [depcc="$$1" am_compiler_list=]) + +AC_CACHE_CHECK([dependency style of $depcc], + [am_cv_$1_dependencies_compiler_type], +[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_$1_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` + fi + am__universal=false + m4_case([$1], [CC], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac], + [CXX], + [case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac]) + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_$1_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_$1_dependencies_compiler_type=none +fi +]) +AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) +AM_CONDITIONAL([am__fastdep$1], [ + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) +]) + + +# AM_SET_DEPDIR +# ------------- +# Choose a directory name for dependency files. +# This macro is AC_REQUIREd in _AM_DEPENDENCIES. +AC_DEFUN([AM_SET_DEPDIR], +[AC_REQUIRE([AM_SET_LEADING_DOT])dnl +AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl +]) + + +# AM_DEP_TRACK +# ------------ +AC_DEFUN([AM_DEP_TRACK], +[AC_ARG_ENABLE([dependency-tracking], [dnl +AS_HELP_STRING( + [--enable-dependency-tracking], + [do not reject slow dependency extractors]) +AS_HELP_STRING( + [--disable-dependency-tracking], + [speeds up one-time build])]) +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi +AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) +AC_SUBST([AMDEPBACKSLASH])dnl +_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl +AC_SUBST([am__nodep])dnl +_AM_SUBST_NOTMAKE([am__nodep])dnl +]) + +# Generate code to set up dependency tracking. -*- Autoconf -*- + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_OUTPUT_DEPENDENCY_COMMANDS +# ------------------------------ +AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], +[{ + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + AS_CASE([$CONFIG_FILES], + [*\'*], [eval set x "$CONFIG_FILES"], + [*], [set x $CONFIG_FILES]) + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`AS_DIRNAME(["$am_mf"])` + am_filepart=`AS_BASENAME(["$am_mf"])` + AM_RUN_LOG([cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles]) || am_rc=$? + done + if test $am_rc -ne 0; then + AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE="gmake" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking).]) + fi + AS_UNSET([am_dirpart]) + AS_UNSET([am_filepart]) + AS_UNSET([am_mf]) + AS_UNSET([am_rc]) + rm -f conftest-deps.mk +} +])# _AM_OUTPUT_DEPENDENCY_COMMANDS + + +# AM_OUTPUT_DEPENDENCY_COMMANDS +# ----------------------------- +# This macro should only be invoked once -- use via AC_REQUIRE. +# +# This code is only required when automatic dependency tracking is enabled. +# This creates each '.Po' and '.Plo' makefile fragment that we'll need in +# order to bootstrap the dependency handling code. +AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], +[AC_CONFIG_COMMANDS([depfiles], + [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], + [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])]) + +# Do all the work for Automake. -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This macro actually does too much. Some checks are only needed if +# your package does certain things. But this isn't really a big deal. + +dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O. +m4_define([AC_PROG_CC], +m4_defn([AC_PROG_CC]) +[_AM_PROG_CC_C_O +]) + +# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) +# AM_INIT_AUTOMAKE([OPTIONS]) +# ----------------------------------------------- +# The call with PACKAGE and VERSION arguments is the old style +# call (pre autoconf-2.50), which is being phased out. PACKAGE +# and VERSION should now be passed to AC_INIT and removed from +# the call to AM_INIT_AUTOMAKE. +# We support both call styles for the transition. After +# the next Automake release, Autoconf can make the AC_INIT +# arguments mandatory, and then we can depend on a new Autoconf +# release and drop the old call support. +AC_DEFUN([AM_INIT_AUTOMAKE], +[AC_PREREQ([2.65])dnl +m4_ifdef([_$0_ALREADY_INIT], + [m4_fatal([$0 expanded multiple times +]m4_defn([_$0_ALREADY_INIT]))], + [m4_define([_$0_ALREADY_INIT], m4_expansion_stack)])dnl +dnl Autoconf wants to disallow AM_ names. We explicitly allow +dnl the ones we care about. +m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl +AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl +AC_REQUIRE([AC_PROG_INSTALL])dnl +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi +AC_SUBST([CYGPATH_W]) + +# Define the identity of the package. +dnl Distinguish between old-style and new-style calls. +m4_ifval([$2], +[AC_DIAGNOSE([obsolete], + [$0: two- and three-arguments forms are deprecated.]) +m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl + AC_SUBST([PACKAGE], [$1])dnl + AC_SUBST([VERSION], [$2])], +[_AM_SET_OPTIONS([$1])dnl +dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. +m4_if( + m4_ifset([AC_PACKAGE_NAME], [ok]):m4_ifset([AC_PACKAGE_VERSION], [ok]), + [ok:ok],, + [m4_fatal([AC_INIT should be called with package and version arguments])])dnl + AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl + AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl + +_AM_IF_OPTION([no-define],, +[AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package]) + AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl + +# Some tools Automake needs. +AC_REQUIRE([AM_SANITY_CHECK])dnl +AC_REQUIRE([AC_ARG_PROGRAM])dnl +AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}]) +AM_MISSING_PROG([AUTOCONF], [autoconf]) +AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}]) +AM_MISSING_PROG([AUTOHEADER], [autoheader]) +AM_MISSING_PROG([MAKEINFO], [makeinfo]) +AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl +AC_REQUIRE([AC_PROG_MKDIR_P])dnl +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +AC_SUBST([mkdir_p], ['$(MKDIR_P)']) +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([AC_PROG_MAKE_SET])dnl +AC_REQUIRE([AM_SET_LEADING_DOT])dnl +_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], + [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], + [_AM_PROG_TAR([v7])])]) +_AM_IF_OPTION([no-dependencies],, +[AC_PROVIDE_IFELSE([AC_PROG_CC], + [_AM_DEPENDENCIES([CC])], + [m4_define([AC_PROG_CC], + m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_CXX], + [_AM_DEPENDENCIES([CXX])], + [m4_define([AC_PROG_CXX], + m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJC], + [_AM_DEPENDENCIES([OBJC])], + [m4_define([AC_PROG_OBJC], + m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl +AC_PROVIDE_IFELSE([AC_PROG_OBJCXX], + [_AM_DEPENDENCIES([OBJCXX])], + [m4_define([AC_PROG_OBJCXX], + m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl +]) +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi +AC_SUBST([CTAGS]) +if test -z "$ETAGS"; then + ETAGS=etags +fi +AC_SUBST([ETAGS]) +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi +AC_SUBST([CSCOPE]) + +AC_REQUIRE([AM_SILENT_RULES])dnl +dnl The testsuite driver may need to know about EXEEXT, so add the +dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This +dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below. +AC_CONFIG_COMMANDS_PRE(dnl +[m4_provide_if([_AM_COMPILER_EXEEXT], + [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) + fi +fi +dnl The trailing newline in this macro's definition is deliberate, for +dnl backward compatibility and to allow trailing 'dnl'-style comments +dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. +]) + +dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not +dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further +dnl mangled by Autoconf and run in a shell conditional statement. +m4_define([_AC_COMPILER_EXEEXT], +m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) + +# When config.status generates a header, we must update the stamp-h file. +# This file resides in the same directory as the config header +# that is generated. The stamp files are numbered to have different names. + +# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the +# loop where config.status creates the headers, so we can generate +# our stamp files there. +AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], +[# Compute $1's index in $config_headers. +_am_arg=$1 +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_SH +# ------------------ +# Define $install_sh. +AC_DEFUN([AM_PROG_INSTALL_SH], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi +AC_SUBST([install_sh])]) + +# Copyright (C) 2003-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# Check whether the underlying file-system supports filenames +# with a leading dot. For instance MS-DOS doesn't. +AC_DEFUN([AM_SET_LEADING_DOT], +[rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null +AC_SUBST([am__leading_dot])]) + +# Add --enable-maintainer-mode option to configure. -*- Autoconf -*- +# From Jim Meyering + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAINTAINER_MODE([DEFAULT-MODE]) +# ---------------------------------- +# Control maintainer-specific portions of Makefiles. +# Default is to disable them, unless 'enable' is passed literally. +# For symmetry, 'disable' may be passed as well. Anyway, the user +# can override the default with the --enable/--disable switch. +AC_DEFUN([AM_MAINTAINER_MODE], +[m4_case(m4_default([$1], [disable]), + [enable], [m4_define([am_maintainer_other], [disable])], + [disable], [m4_define([am_maintainer_other], [enable])], + [m4_define([am_maintainer_other], [enable]) + m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])]) +AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles]) + dnl maintainer-mode's default is 'disable' unless 'enable' is passed + AC_ARG_ENABLE([maintainer-mode], + [AS_HELP_STRING([--]am_maintainer_other[-maintainer-mode], + am_maintainer_other[ make rules and dependencies not useful + (and sometimes confusing) to the casual installer])], + [USE_MAINTAINER_MODE=$enableval], + [USE_MAINTAINER_MODE=]m4_if(am_maintainer_other, [enable], [no], [yes])) + AC_MSG_RESULT([$USE_MAINTAINER_MODE]) + AM_CONDITIONAL([MAINTAINER_MODE], [test $USE_MAINTAINER_MODE = yes]) + MAINT=$MAINTAINER_MODE_TRUE + AC_SUBST([MAINT])dnl +] +) + +# Check to see how 'make' treats includes. -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MAKE_INCLUDE() +# ----------------- +# Check whether make has an 'include' directive that can support all +# the idioms we need for our automatic dependency tracking code. +AC_DEFUN([AM_MAKE_INCLUDE], +[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive]) +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out]) + AS_CASE([$?:`cat confinc.out 2>/dev/null`], + ['0:this is the am__doit target'], + [AS_CASE([$s], + [BSD], [am__include='.include' am__quote='"'], + [am__include='include' am__quote=''])]) + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +AC_MSG_RESULT([${_am_result}]) +AC_SUBST([am__include])]) +AC_SUBST([am__quote])]) + +# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- + +# Copyright (C) 1997-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_MISSING_PROG(NAME, PROGRAM) +# ------------------------------ +AC_DEFUN([AM_MISSING_PROG], +[AC_REQUIRE([AM_MISSING_HAS_RUN]) +$1=${$1-"${am_missing_run}$2"} +AC_SUBST($1)]) + +# AM_MISSING_HAS_RUN +# ------------------ +# Define MISSING if not defined so far and test if it is modern enough. +# If it is, set am_missing_run to use it, otherwise, to nothing. +AC_DEFUN([AM_MISSING_HAS_RUN], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([missing])dnl +if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + AC_MSG_WARN(['missing' script is too old or missing]) +fi +]) + +# Helper functions for option handling. -*- Autoconf -*- + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_MANGLE_OPTION(NAME) +# ----------------------- +AC_DEFUN([_AM_MANGLE_OPTION], +[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) + +# _AM_SET_OPTION(NAME) +# -------------------- +# Set option NAME. Presently that only means defining a flag for this option. +AC_DEFUN([_AM_SET_OPTION], +[m4_define(_AM_MANGLE_OPTION([$1]), [1])]) + +# _AM_SET_OPTIONS(OPTIONS) +# ------------------------ +# OPTIONS is a space-separated list of Automake options. +AC_DEFUN([_AM_SET_OPTIONS], +[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) + +# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) +# ------------------------------------------- +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +AC_DEFUN([_AM_IF_OPTION], +[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_CC_C_O +# --------------- +# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC +# to automatically call this. +AC_DEFUN([_AM_PROG_CC_C_O], +[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl +AC_REQUIRE_AUX_FILE([compile])dnl +AC_LANG_PUSH([C])dnl +AC_CACHE_CHECK( + [whether $CC understands -c and -o together], + [am_cv_prog_cc_c_o], + [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])]) + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i]) +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +AC_LANG_POP([C])]) + +# For backward compatibility. +AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_RUN_LOG(COMMAND) +# ------------------- +# Run COMMAND, save the exit status in ac_status, and log it. +# (This has been adapted from Autoconf's _AC_RUN_LOG macro.) +AC_DEFUN([AM_RUN_LOG], +[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD + ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + (exit $ac_status); }]) + +# Check to make sure that the build environment is sane. -*- Autoconf -*- + +# Copyright (C) 1996-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SANITY_CHECK +# --------------- +AC_DEFUN([AM_SANITY_CHECK], +[AC_MSG_CHECKING([whether build environment is sane]) +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[[\\\"\#\$\&\'\`$am_lf]]*) + AC_MSG_ERROR([unsafe absolute working directory name]);; +esac +case $srcdir in + *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) + AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$[*]" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$[*]" != "X $srcdir/configure conftest.file" \ + && test "$[*]" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken + alias in your environment]) + fi + if test "$[2]" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$[2]" = conftest.file + ) +then + # Ok. + : +else + AC_MSG_ERROR([newly created file is older than distributed files! +Check your system clock]) +fi +AC_MSG_RESULT([yes]) +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi +AC_CONFIG_COMMANDS_PRE( + [AC_MSG_CHECKING([that generated files are newer than configure]) + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + AC_MSG_RESULT([done])]) +rm -f conftest.file +]) + +# Copyright (C) 2009-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_SILENT_RULES([DEFAULT]) +# -------------------------- +# Enable less verbose build rules; with the default set to DEFAULT +# ("yes" being less verbose, "no" or empty being verbose). +AC_DEFUN([AM_SILENT_RULES], +[AC_ARG_ENABLE([silent-rules], [dnl +AS_HELP_STRING( + [--enable-silent-rules], + [less verbose build output (undo: "make V=1")]) +AS_HELP_STRING( + [--disable-silent-rules], + [verbose build output (undo: "make V=0")])dnl +]) +case $enable_silent_rules in @%:@ ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);; +esac +dnl +dnl A few 'make' implementations (e.g., NonStop OS and NextStep) +dnl do not support nested variable expansions. +dnl See automake bug#9928 and bug#10237. +am_make=${MAKE-make} +AC_CACHE_CHECK([whether $am_make supports nested variables], + [am_cv_make_support_nested_variables], + [if AS_ECHO([['TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi]) +if test $am_cv_make_support_nested_variables = yes; then + dnl Using '$V' instead of '$(V)' breaks IRIX make. + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AC_SUBST([AM_V])dnl +AM_SUBST_NOTMAKE([AM_V])dnl +AC_SUBST([AM_DEFAULT_V])dnl +AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl +AC_SUBST([AM_DEFAULT_VERBOSITY])dnl +AM_BACKSLASH='\' +AC_SUBST([AM_BACKSLASH])dnl +_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl +]) + +# Copyright (C) 2001-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# AM_PROG_INSTALL_STRIP +# --------------------- +# One issue with vendor 'install' (even GNU) is that you can't +# specify the program used to strip binaries. This is especially +# annoying in cross-compiling environments, where the build's strip +# is unlikely to handle the host's binaries. +# Fortunately install-sh will honor a STRIPPROG variable, so we +# always use install-sh in "make install-strip", and initialize +# STRIPPROG with the value of the STRIP variable (set by the user). +AC_DEFUN([AM_PROG_INSTALL_STRIP], +[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +dnl Don't test for $cross_compiling = yes, because it might be 'maybe'. +if test "$cross_compiling" != no; then + AC_CHECK_TOOL([STRIP], [strip], :) +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" +AC_SUBST([INSTALL_STRIP_PROGRAM])]) + +# Copyright (C) 2006-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_SUBST_NOTMAKE(VARIABLE) +# --------------------------- +# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. +# This macro is traced by Automake. +AC_DEFUN([_AM_SUBST_NOTMAKE]) + +# AM_SUBST_NOTMAKE(VARIABLE) +# -------------------------- +# Public sister of _AM_SUBST_NOTMAKE. +AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) + +# Check how to create a tarball. -*- Autoconf -*- + +# Copyright (C) 2004-2021 Free Software Foundation, Inc. +# +# This file is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# _AM_PROG_TAR(FORMAT) +# -------------------- +# Check how to create a tarball in format FORMAT. +# FORMAT should be one of 'v7', 'ustar', or 'pax'. +# +# Substitute a variable $(am__tar) that is a command +# writing to stdout a FORMAT-tarball containing the directory +# $tardir. +# tardir=directory && $(am__tar) > result.tar +# +# Substitute a variable $(am__untar) that extract such +# a tarball read from stdin. +# $(am__untar) < result.tar +# +AC_DEFUN([_AM_PROG_TAR], +[# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AC_SUBST([AMTAR], ['$${TAR-tar}']) + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' + +m4_if([$1], [v7], + [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], + + [m4_case([$1], + [ustar], + [# The POSIX 1988 'ustar' format is defined with fixed-size fields. + # There is notably a 21 bits limit for the UID and the GID. In fact, + # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343 + # and bug#13588). + am_max_uid=2097151 # 2^21 - 1 + am_max_gid=$am_max_uid + # The $UID and $GID variables are not portable, so we need to resort + # to the POSIX-mandated id(1) utility. Errors in the 'id' calls + # below are definitely unexpected, so allow the users to see them + # (that is, avoid stderr redirection). + am_uid=`id -u || echo unknown` + am_gid=`id -g || echo unknown` + AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format]) + if test $am_uid -le $am_max_uid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi + AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format]) + if test $am_gid -le $am_max_gid; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + _am_tools=none + fi], + + [pax], + [], + + [m4_fatal([Unknown tar format])]) + + AC_MSG_CHECKING([how to create a $1 tar archive]) + + # Go ahead even if we have the value already cached. We do so because we + # need to set the values for the 'am__tar' and 'am__untar' variables. + _am_tools=${am_cv_prog_tar_$1-$_am_tools} + + for _am_tool in $_am_tools; do + case $_am_tool in + gnutar) + for _am_tar in tar gnutar gtar; do + AM_RUN_LOG([$_am_tar --version]) && break + done + am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' + am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' + am__untar="$_am_tar -xf -" + ;; + plaintar) + # Must skip GNU tar: if it does not support --format= it doesn't create + # ustar tarball either. + (tar --version) >/dev/null 2>&1 && continue + am__tar='tar chf - "$$tardir"' + am__tar_='tar chf - "$tardir"' + am__untar='tar xf -' + ;; + pax) + am__tar='pax -L -x $1 -w "$$tardir"' + am__tar_='pax -L -x $1 -w "$tardir"' + am__untar='pax -r' + ;; + cpio) + am__tar='find "$$tardir" -print | cpio -o -H $1 -L' + am__tar_='find "$tardir" -print | cpio -o -H $1 -L' + am__untar='cpio -i -H $1 -d' + ;; + none) + am__tar=false + am__tar_=false + am__untar=false + ;; + esac + + # If the value was cached, stop now. We just wanted to have am__tar + # and am__untar set. + test -n "${am_cv_prog_tar_$1}" && break + + # tar/untar a dummy directory, and stop if the command works. + rm -rf conftest.dir + mkdir conftest.dir + echo GrepMe > conftest.dir/file + AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) + rm -rf conftest.dir + if test -s conftest.tar; then + AM_RUN_LOG([$am__untar /dev/null 2>&1 && break + fi + done + rm -rf conftest.dir + + AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) + AC_MSG_RESULT([$am_cv_prog_tar_$1])]) + +AC_SUBST([am__tar]) +AC_SUBST([am__untar]) +]) # _AM_PROG_TAR + +m4_include([m4/ax_pthread.m4]) +m4_include([m4/libtool.m4]) +m4_include([m4/ltoptions.m4]) +m4_include([m4/ltsugar.m4]) +m4_include([m4/ltversion.m4]) +m4_include([m4/lt~obsolete.m4]) +m4_include([m4/pcre2_visibility.m4]) diff --git a/3rd/pcre2/ar-lib b/3rd/pcre2/ar-lib new file mode 100644 index 00000000..c349042c --- /dev/null +++ b/3rd/pcre2/ar-lib @@ -0,0 +1,271 @@ +#! /bin/sh +# Wrapper for Microsoft lib.exe + +me=ar-lib +scriptversion=2019-07-04.01; # UTC + +# Copyright (C) 2010-2021 Free Software Foundation, Inc. +# Written by Peter Rosin . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + + +# func_error message +func_error () +{ + echo "$me: $1" 1>&2 + exit 1 +} + +file_conv= + +# func_file_conv build_file +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv in + mingw) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin | msys) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_at_file at_file operation archive +# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE +# for each of them. +# When interpreting the content of the @FILE, do NOT use func_file_conv, +# since the user would need to supply preconverted file names to +# binutils ar, at least for MinGW. +func_at_file () +{ + operation=$2 + archive=$3 + at_file_contents=`cat "$1"` + eval set x "$at_file_contents" + shift + + for member + do + $AR -NOLOGO $operation:"$member" "$archive" || exit $? + done +} + +case $1 in + '') + func_error "no command. Try '$0 --help' for more information." + ;; + -h | --h*) + cat < /dev/null 2>&1 + if [ $? = 0 ]; then + libtoolize=$l + echo "Found $l" + break + fi + echo "Did not find $l" +done + +if [ "x$libtoolize" = "x" ]; then + echo "Can't find libtoolize on your system" + exit 1 +fi + +set -ex +$libtoolize -c -f +rm -rf autom4te.cache Makefile.in aclocal.m4 +aclocal --force -I m4 +autoconf -f -W all,no-obsolete +autoheader -f -W all + +# Added no-portability to suppress automake 1.12's warning about the use +# of recursive variables. + +automake -a -c -f -W all,no-portability + +rm -rf autom4te.cache +exit 0 + +# end autogen.sh diff --git a/3rd/pcre2/build.zig b/3rd/pcre2/build.zig new file mode 100644 index 00000000..4cb6d99d --- /dev/null +++ b/3rd/pcre2/build.zig @@ -0,0 +1,173 @@ +const std = @import("std"); + +pub const CodeUnitWidth = enum { + @"8", + @"16", + @"32", +}; + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + const linkage = b.option(std.builtin.LinkMode, "linkage", "whether to statically or dynamically link the library") orelse @as(std.builtin.LinkMode, if (target.result.isGnuLibC()) .dynamic else .static); + const codeUnitWidth = b.option(CodeUnitWidth, "code-unit-width", "Sets the code unit width") orelse .@"8"; + + const pcre2_header_dir = b.addWriteFiles(); + const pcre2_header = pcre2_header_dir.addCopyFile(b.path("src/pcre2.h.generic"), "pcre2.h"); + + const config_header = b.addConfigHeader( + .{ + .style = .{ .cmake = b.path("config-cmake.h.in") }, + .include_path = "config.h", + }, + .{ + .HAVE_ASSERT_H = true, + .HAVE_UNISTD_H = (target.result.os.tag != .windows), + .HAVE_WINDOWS_H = (target.result.os.tag == .windows), + + .HAVE_MEMMOVE = true, + .HAVE_STRERROR = true, + + .SUPPORT_PCRE2_8 = codeUnitWidth == CodeUnitWidth.@"8", + .SUPPORT_PCRE2_16 = codeUnitWidth == CodeUnitWidth.@"16", + .SUPPORT_PCRE2_32 = codeUnitWidth == CodeUnitWidth.@"32", + .SUPPORT_UNICODE = true, + + .PCRE2_EXPORT = null, + .PCRE2_LINK_SIZE = 2, + .PCRE2_HEAP_LIMIT = 20000000, + .PCRE2_MATCH_LIMIT = 10000000, + .PCRE2_MATCH_LIMIT_DEPTH = "MATCH_LIMIT", + .PCRE2_MAX_VARLOOKBEHIND = 255, + .NEWLINE_DEFAULT = 2, + .PCRE2_PARENS_NEST_LIMIT = 250, + }, + ); + + // pcre2-8/16/32.so + + const lib = std.Build.Step.Compile.create(b, .{ + .name = b.fmt("pcre2-{s}", .{@tagName(codeUnitWidth)}), + .root_module = .{ + .target = target, + .optimize = optimize, + .link_libc = true, + }, + .kind = .lib, + .linkage = linkage, + }); + + lib.defineCMacro("HAVE_CONFIG_H", null); + lib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth)); + if (linkage == .static) { + lib.defineCMacro("PCRE2_STATIC", null); + } + + lib.addConfigHeader(config_header); + lib.addIncludePath(pcre2_header_dir.getDirectory()); + lib.addIncludePath(b.path("src")); + + lib.addCSourceFile(.{ + .file = b.addWriteFiles().addCopyFile(b.path("src/pcre2_chartables.c.dist"), "pcre2_chartables.c"), + }); + + lib.addCSourceFiles(.{ + .files = &.{ + "src/pcre2_auto_possess.c", + "src/pcre2_chkdint.c", + "src/pcre2_compile.c", + "src/pcre2_compile_class.c", + "src/pcre2_config.c", + "src/pcre2_context.c", + "src/pcre2_convert.c", + "src/pcre2_dfa_match.c", + "src/pcre2_error.c", + "src/pcre2_extuni.c", + "src/pcre2_find_bracket.c", + "src/pcre2_jit_compile.c", + "src/pcre2_maketables.c", + "src/pcre2_match.c", + "src/pcre2_match_data.c", + "src/pcre2_newline.c", + "src/pcre2_ord2utf.c", + "src/pcre2_pattern_info.c", + "src/pcre2_script_run.c", + "src/pcre2_serialize.c", + "src/pcre2_string_utils.c", + "src/pcre2_study.c", + "src/pcre2_substitute.c", + "src/pcre2_substring.c", + "src/pcre2_tables.c", + "src/pcre2_ucd.c", + "src/pcre2_valid_utf.c", + "src/pcre2_xclass.c", + }, + }); + + lib.installHeader(pcre2_header, "pcre2.h"); + b.installArtifact(lib); + + + // pcre2test + + const pcre2test = b.addExecutable(.{ + .name = "pcre2test", + .target = target, + .optimize = optimize, + }); + + + // pcre2-posix.so + + if (codeUnitWidth == CodeUnitWidth.@"8") { + const posixLib = std.Build.Step.Compile.create(b, .{ + .name = "pcre2-posix", + .root_module = .{ + .target = target, + .optimize = optimize, + .link_libc = true, + }, + .kind = .lib, + .linkage = linkage, + }); + + posixLib.defineCMacro("HAVE_CONFIG_H", null); + posixLib.defineCMacro("PCRE2_CODE_UNIT_WIDTH", @tagName(codeUnitWidth)); + if (linkage == .static) { + posixLib.defineCMacro("PCRE2_STATIC", null); + } + + posixLib.addConfigHeader(config_header); + posixLib.addIncludePath(pcre2_header_dir.getDirectory()); + posixLib.addIncludePath(b.path("src")); + + posixLib.addCSourceFiles(.{ + .files = &.{ + "src/pcre2posix.c", + }, + }); + + posixLib.installHeader(b.path("src/pcre2posix.h"), "pcre2posix.h"); + b.installArtifact(posixLib); + + pcre2test.linkLibrary(posixLib); + } + + + // pcre2test (again) + + pcre2test.defineCMacro("HAVE_CONFIG_H", null); + + pcre2test.addConfigHeader(config_header); + pcre2test.addIncludePath(pcre2_header_dir.getDirectory()); + pcre2test.addIncludePath(b.path("src")); + + pcre2test.addCSourceFile(.{ + .file = b.path("src/pcre2test.c"), + }); + + pcre2test.linkLibC(); + pcre2test.linkLibrary(lib); + + b.installArtifact(pcre2test); +} diff --git a/3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS b/3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS new file mode 100644 index 00000000..53b6b71e --- /dev/null +++ b/3rd/pcre2/cmake/COPYING-CMAKE-SCRIPTS @@ -0,0 +1,22 @@ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/3rd/pcre2/cmake/FindEditline.cmake b/3rd/pcre2/cmake/FindEditline.cmake new file mode 100644 index 00000000..38d075fd --- /dev/null +++ b/3rd/pcre2/cmake/FindEditline.cmake @@ -0,0 +1,13 @@ +# Modified from FindReadline.cmake (PH Feb 2012) + +if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) + set(EDITLINE_FOUND TRUE) +else() + find_path(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES editline edit/readline) + + find_library(EDITLINE_LIBRARY NAMES edit) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) + + mark_as_advanced(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) +endif() diff --git a/3rd/pcre2/cmake/FindReadline.cmake b/3rd/pcre2/cmake/FindReadline.cmake new file mode 100644 index 00000000..6b650464 --- /dev/null +++ b/3rd/pcre2/cmake/FindReadline.cmake @@ -0,0 +1,27 @@ +# from http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/FindReadline.cmake +# http://websvn.kde.org/trunk/KDE/kdeedu/cmake/modules/COPYING-CMAKE-SCRIPTS +# --> BSD licensed +# +# GNU Readline library finder +if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY AND NCURSES_LIBRARY) + set(READLINE_FOUND TRUE) +else() + find_path(READLINE_INCLUDE_DIR readline/readline.h /usr/include/readline) + + # 2008-04-22 The next clause used to read like this: + # + # FIND_LIBRARY(READLINE_LIBRARY NAMES readline) + # FIND_LIBRARY(NCURSES_LIBRARY NAMES ncurses ) + # include(FindPackageHandleStandardArgs) + # FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG NCURSES_LIBRARY READLINE_INCLUDE_DIR READLINE_LIBRARY ) + # + # I was advised to modify it such that it will find an ncurses library if + # required, but not if one was explicitly given, that is, it allows the + # default to be overridden. PH + + find_library(READLINE_LIBRARY NAMES readline) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Readline DEFAULT_MSG READLINE_INCLUDE_DIR READLINE_LIBRARY) + + mark_as_advanced(READLINE_INCLUDE_DIR READLINE_LIBRARY) +endif() diff --git a/3rd/pcre2/cmake/pcre2-config-version.cmake.in b/3rd/pcre2/cmake/pcre2-config-version.cmake.in new file mode 100644 index 00000000..db006063 --- /dev/null +++ b/3rd/pcre2/cmake/pcre2-config-version.cmake.in @@ -0,0 +1,14 @@ +set(PACKAGE_VERSION_MAJOR @PCRE2_MAJOR@) +set(PACKAGE_VERSION_MINOR @PCRE2_MINOR@) +set(PACKAGE_VERSION_PATCH 0) +set(PACKAGE_VERSION @PCRE2_MAJOR@.@PCRE2_MINOR@.0) + +# Check whether the requested PACKAGE_FIND_VERSION is compatible +if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION OR PACKAGE_VERSION_MAJOR GREATER PACKAGE_FIND_VERSION_MAJOR) + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + set(PACKAGE_VERSION_COMPATIBLE TRUE) + if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION) + set(PACKAGE_VERSION_EXACT TRUE) + endif() +endif() diff --git a/3rd/pcre2/cmake/pcre2-config.cmake.in b/3rd/pcre2/cmake/pcre2-config.cmake.in new file mode 100644 index 00000000..082dc198 --- /dev/null +++ b/3rd/pcre2/cmake/pcre2-config.cmake.in @@ -0,0 +1,168 @@ +# pcre2-config.cmake +# ---------------- +# +# Finds the PCRE2 library, specify the starting search path in PCRE2_ROOT. +# +# Static vs. shared +# ----------------- +# To make use of the static library instead of the shared one, one needs +# to set the variable PCRE2_USE_STATIC_LIBS to ON before calling find_package. +# Example: +# set(PCRE2_USE_STATIC_LIBS ON) +# find_package(PCRE2 CONFIG COMPONENTS 8BIT) +# +# This will define the following variables: +# +# PCRE2_FOUND - True if the system has the PCRE2 library. +# PCRE2_VERSION - The version of the PCRE2 library which was found. +# +# and the following imported targets: +# +# PCRE2::8BIT - The 8 bit PCRE2 library. +# PCRE2::16BIT - The 16 bit PCRE2 library. +# PCRE2::32BIT - The 32 bit PCRE2 library. +# PCRE2::POSIX - The POSIX PCRE2 library. + +set(PCRE2_NON_STANDARD_LIB_PREFIX @NON_STANDARD_LIB_PREFIX@) +set(PCRE2_NON_STANDARD_LIB_SUFFIX @NON_STANDARD_LIB_SUFFIX@) +set(PCRE2_8BIT_NAME pcre2-8) +set(PCRE2_16BIT_NAME pcre2-16) +set(PCRE2_32BIT_NAME pcre2-32) +set(PCRE2_POSIX_NAME pcre2-posix) +find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h DOC "PCRE2 include directory") +if(PCRE2_USE_STATIC_LIBS) + if(MSVC) + set(PCRE2_8BIT_NAME pcre2-8-static) + set(PCRE2_16BIT_NAME pcre2-16-static) + set(PCRE2_32BIT_NAME pcre2-32-static) + set(PCRE2_POSIX_NAME pcre2-posix-static) + endif() + + set(PCRE2_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX}) + set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + set(PCRE2_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX}) + if(MINGW AND PCRE2_NON_STANDARD_LIB_PREFIX) + set(PCRE2_PREFIX "") + endif() + + set(PCRE2_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(MINGW AND PCRE2_NON_STANDARD_LIB_SUFFIX) + set(PCRE2_SUFFIX "-0.dll") + elseif(MSVC) + set(PCRE2_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() +find_library( + PCRE2_8BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} + DOC "8 bit PCRE2 library" +) +find_library( + PCRE2_16BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} + DOC "16 bit PCRE2 library" +) +find_library( + PCRE2_32BIT_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} + DOC "32 bit PCRE2 library" +) +find_library( + PCRE2_POSIX_LIBRARY + NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} + DOC "8 bit POSIX PCRE2 library" +) +unset(PCRE2_NON_STANDARD_LIB_PREFIX) +unset(PCRE2_NON_STANDARD_LIB_SUFFIX) +unset(PCRE2_8BIT_NAME) +unset(PCRE2_16BIT_NAME) +unset(PCRE2_32BIT_NAME) +unset(PCRE2_POSIX_NAME) + +# Set version +if(PCRE2_INCLUDE_DIR) + set(PCRE2_VERSION "@PCRE2_MAJOR@.@PCRE2_MINOR@.0") +endif() + +# Which components have been found. +if(PCRE2_8BIT_LIBRARY) + set(PCRE2_8BIT_FOUND TRUE) +endif() +if(PCRE2_16BIT_LIBRARY) + set(PCRE2_16BIT_FOUND TRUE) +endif() +if(PCRE2_32BIT_LIBRARY) + set(PCRE2_32BIT_FOUND TRUE) +endif() +if(PCRE2_POSIX_LIBRARY) + set(PCRE2_POSIX_FOUND TRUE) +endif() + +# Check if at least one component has been specified. +list(LENGTH PCRE2_FIND_COMPONENTS PCRE2_NCOMPONENTS) +if(PCRE2_NCOMPONENTS LESS 1) + message(FATAL_ERROR "No components have been specified. This is not allowed. Please, specify at least one component.") +endif() +unset(PCRE2_NCOMPONENTS) + +# When POSIX component has been specified make sure that also 8BIT component is specified. +set(PCRE2_8BIT_COMPONENT FALSE) +set(PCRE2_POSIX_COMPONENT FALSE) +foreach(component ${PCRE2_FIND_COMPONENTS}) + if(component STREQUAL "8BIT") + set(PCRE2_8BIT_COMPONENT TRUE) + elseif(component STREQUAL "POSIX") + set(PCRE2_POSIX_COMPONENT TRUE) + endif() +endforeach() + +if(PCRE2_POSIX_COMPONENT AND NOT PCRE2_8BIT_COMPONENT) + message( + FATAL_ERROR + "The component POSIX is specified while the 8BIT one is not. This is not allowed. Please, also specify the 8BIT component." + ) +endif() +unset(PCRE2_8BIT_COMPONENT) +unset(PCRE2_POSIX_COMPONENT) + +include(FindPackageHandleStandardArgs) +set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") +find_package_handle_standard_args( + PCRE2 + FOUND_VAR PCRE2_FOUND + REQUIRED_VARS PCRE2_INCLUDE_DIR + HANDLE_COMPONENTS + VERSION_VAR PCRE2_VERSION + CONFIG_MODE +) + +set(PCRE2_LIBRARIES) +if(PCRE2_FOUND) + foreach(component ${PCRE2_FIND_COMPONENTS}) + if(PCRE2_USE_STATIC_LIBS) + add_library(PCRE2::${component} STATIC IMPORTED) + target_compile_definitions(PCRE2::${component} INTERFACE PCRE2_STATIC) + else() + add_library(PCRE2::${component} SHARED IMPORTED) + endif() + set_target_properties( + PCRE2::${component} + PROPERTIES + IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}" + IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}" + ) + if(component STREQUAL "POSIX") + set_target_properties( + PCRE2::${component} + PROPERTIES INTERFACE_LINK_LIBRARIES "PCRE2::8BIT" LINK_LIBRARIES "PCRE2::8BIT" + ) + endif() + + set(PCRE2_LIBRARIES ${PCRE2_LIBRARIES} ${PCRE2_${component}_LIBRARY}) + mark_as_advanced(PCRE2_${component}_LIBRARY) + endforeach() +endif() + +mark_as_advanced(PCRE2_INCLUDE_DIR) diff --git a/3rd/pcre2/compile b/3rd/pcre2/compile new file mode 100644 index 00000000..df363c8f --- /dev/null +++ b/3rd/pcre2/compile @@ -0,0 +1,348 @@ +#! /bin/sh +# Wrapper for compilers which do not understand '-c -o'. + +scriptversion=2018-03-07.03; # UTC + +# Copyright (C) 1999-2021 Free Software Foundation, Inc. +# Written by Tom Tromey . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# This file is maintained in Automake, please report +# bugs to or send patches to +# . + +nl=' +' + +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent tools from complaining about whitespace usage. +IFS=" "" $nl" + +file_conv= + +# func_file_conv build_file lazy +# Convert a $build file to $host form and store it in $file +# Currently only supports Windows hosts. If the determined conversion +# type is listed in (the comma separated) LAZY, no conversion will +# take place. +func_file_conv () +{ + file=$1 + case $file in + / | /[!/]*) # absolute file, and not a UNC file + if test -z "$file_conv"; then + # lazily determine how to convert abs files + case `uname -s` in + MINGW*) + file_conv=mingw + ;; + CYGWIN* | MSYS*) + file_conv=cygwin + ;; + *) + file_conv=wine + ;; + esac + fi + case $file_conv/,$2, in + *,$file_conv,*) + ;; + mingw/*) + file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` + ;; + cygwin/* | msys/*) + file=`cygpath -m "$file" || echo "$file"` + ;; + wine/*) + file=`winepath -w "$file" || echo "$file"` + ;; + esac + ;; + esac +} + +# func_cl_dashL linkdir +# Make cl look for libraries in LINKDIR +func_cl_dashL () +{ + func_file_conv "$1" + if test -z "$lib_path"; then + lib_path=$file + else + lib_path="$lib_path;$file" + fi + linker_opts="$linker_opts -LIBPATH:$file" +} + +# func_cl_dashl library +# Do a library search-path lookup for cl +func_cl_dashl () +{ + lib=$1 + found=no + save_IFS=$IFS + IFS=';' + for dir in $lib_path $LIB + do + IFS=$save_IFS + if $shared && test -f "$dir/$lib.dll.lib"; then + found=yes + lib=$dir/$lib.dll.lib + break + fi + if test -f "$dir/$lib.lib"; then + found=yes + lib=$dir/$lib.lib + break + fi + if test -f "$dir/lib$lib.a"; then + found=yes + lib=$dir/lib$lib.a + break + fi + done + IFS=$save_IFS + + if test "$found" != yes; then + lib=$lib.lib + fi +} + +# func_cl_wrapper cl arg... +# Adjust compile command to suit cl +func_cl_wrapper () +{ + # Assume a capable shell + lib_path= + shared=: + linker_opts= + for arg + do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + eat=1 + case $2 in + *.o | *.[oO][bB][jJ]) + func_file_conv "$2" + set x "$@" -Fo"$file" + shift + ;; + *) + func_file_conv "$2" + set x "$@" -Fe"$file" + shift + ;; + esac + ;; + -I) + eat=1 + func_file_conv "$2" mingw + set x "$@" -I"$file" + shift + ;; + -I*) + func_file_conv "${1#-I}" mingw + set x "$@" -I"$file" + shift + ;; + -l) + eat=1 + func_cl_dashl "$2" + set x "$@" "$lib" + shift + ;; + -l*) + func_cl_dashl "${1#-l}" + set x "$@" "$lib" + shift + ;; + -L) + eat=1 + func_cl_dashL "$2" + ;; + -L*) + func_cl_dashL "${1#-L}" + ;; + -static) + shared=false + ;; + -Wl,*) + arg=${1#-Wl,} + save_ifs="$IFS"; IFS=',' + for flag in $arg; do + IFS="$save_ifs" + linker_opts="$linker_opts $flag" + done + IFS="$save_ifs" + ;; + -Xlinker) + eat=1 + linker_opts="$linker_opts $2" + ;; + -*) + set x "$@" "$1" + shift + ;; + *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) + func_file_conv "$1" + set x "$@" -Tp"$file" + shift + ;; + *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) + func_file_conv "$1" mingw + set x "$@" "$file" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift + done + if test -n "$linker_opts"; then + linker_opts="-link$linker_opts" + fi + exec "$@" $linker_opts + exit 1 +} + +eat= + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: compile [--help] [--version] PROGRAM [ARGS] + +Wrapper for compilers which do not understand '-c -o'. +Remove '-o dest.o' from ARGS, run PROGRAM with the remaining +arguments, and rename the output as expected. + +If you are trying to build a whole package this is not the +right script to run: please start by reading the file 'INSTALL'. + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "compile $scriptversion" + exit $? + ;; + cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \ + icl | *[/\\]icl | icl.exe | *[/\\]icl.exe ) + func_cl_wrapper "$@" # Doesn't return... + ;; +esac + +ofile= +cfile= + +for arg +do + if test -n "$eat"; then + eat= + else + case $1 in + -o) + # configure might choose to run compile as 'compile cc -o foo foo.c'. + # So we strip '-o arg' only if arg is an object. + eat=1 + case $2 in + *.o | *.obj) + ofile=$2 + ;; + *) + set x "$@" -o "$2" + shift + ;; + esac + ;; + *.c) + cfile=$1 + set x "$@" "$1" + shift + ;; + *) + set x "$@" "$1" + shift + ;; + esac + fi + shift +done + +if test -z "$ofile" || test -z "$cfile"; then + # If no '-o' option was seen then we might have been invoked from a + # pattern rule where we don't need one. That is ok -- this is a + # normal compilation that the losing compiler can handle. If no + # '.c' file was seen then we are probably linking. That is also + # ok. + exec "$@" +fi + +# Name of file we expect compiler to create. +cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` + +# Create the lock directory. +# Note: use '[/\\:.-]' here to ensure that we don't use the same name +# that we are using for the .o file. Also, base the name on the expected +# object file name, since that is what matters with a parallel build. +lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d +while true; do + if mkdir "$lockdir" >/dev/null 2>&1; then + break + fi + sleep 1 +done +# FIXME: race condition here if user kills between mkdir and trap. +trap "rmdir '$lockdir'; exit 1" 1 2 15 + +# Run the compile. +"$@" +ret=$? + +if test -f "$cofile"; then + test "$cofile" = "$ofile" || mv "$cofile" "$ofile" +elif test -f "${cofile}bj"; then + test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" +fi + +rmdir "$lockdir" +exit $ret + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/3rd/pcre2/config-cmake.h.in b/3rd/pcre2/config-cmake.h.in new file mode 100644 index 00000000..0eff0e0f --- /dev/null +++ b/3rd/pcre2/config-cmake.h.in @@ -0,0 +1,58 @@ +/* config.h for CMake builds */ + +#cmakedefine HAVE_ASSERT_H 1 +#cmakedefine HAVE_BUILTIN_ASSUME 1 +#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1 +#cmakedefine HAVE_BUILTIN_UNREACHABLE 1 +#cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1 +#cmakedefine HAVE_DIRENT_H 1 +#cmakedefine HAVE_SYS_STAT_H 1 +#cmakedefine HAVE_SYS_TYPES_H 1 +#cmakedefine HAVE_UNISTD_H 1 +#cmakedefine HAVE_WINDOWS_H 1 + +#cmakedefine HAVE_BCOPY 1 +#cmakedefine HAVE_MEMFD_CREATE 1 +#cmakedefine HAVE_MEMMOVE 1 +#cmakedefine HAVE_SECURE_GETENV 1 +#cmakedefine HAVE_STRERROR 1 + +#cmakedefine SUPPORT_PCRE2_8 1 +#cmakedefine SUPPORT_PCRE2_16 1 +#cmakedefine SUPPORT_PCRE2_32 1 +#cmakedefine DISABLE_PERCENT_ZT 1 + +#cmakedefine SUPPORT_LIBBZ2 1 +#cmakedefine SUPPORT_LIBEDIT 1 +#cmakedefine SUPPORT_LIBREADLINE 1 +#cmakedefine SUPPORT_LIBZ 1 + +#cmakedefine SUPPORT_JIT 1 +#cmakedefine SLJIT_PROT_EXECUTABLE_ALLOCATOR 1 +#cmakedefine SUPPORT_PCRE2GREP_JIT 1 +#cmakedefine SUPPORT_PCRE2GREP_CALLOUT 1 +#cmakedefine SUPPORT_PCRE2GREP_CALLOUT_FORK 1 +#cmakedefine SUPPORT_UNICODE 1 +#cmakedefine SUPPORT_VALGRIND 1 + +#cmakedefine BSR_ANYCRLF 1 +#cmakedefine EBCDIC 1 +#cmakedefine EBCDIC_NL25 1 +#cmakedefine HEAP_MATCH_RECURSE 1 +#cmakedefine NEVER_BACKSLASH_C 1 + +#define PCRE2_EXPORT @PCRE2_EXPORT@ +#define LINK_SIZE @PCRE2_LINK_SIZE@ +#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@ +#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ +#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ +#define MAX_VARLOOKBEHIND @PCRE2_MAX_VARLOOKBEHIND@ +#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@ +#define PARENS_NEST_LIMIT @PCRE2_PARENS_NEST_LIMIT@ +#define PCRE2GREP_BUFSIZE @PCRE2GREP_BUFSIZE@ +#define PCRE2GREP_MAX_BUFSIZE @PCRE2GREP_MAX_BUFSIZE@ + +#define MAX_NAME_SIZE 128 +#define MAX_NAME_COUNT 10000 + +/* end config.h for CMake builds */ diff --git a/3rd/pcre2/config.guess b/3rd/pcre2/config.guess new file mode 100644 index 00000000..7f76b622 --- /dev/null +++ b/3rd/pcre2/config.guess @@ -0,0 +1,1754 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright 1992-2022 Free Software Foundation, Inc. + +# shellcheck disable=SC2006,SC2268 # see below for rationale + +timestamp='2022-01-09' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). +# +# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. +# +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess +# +# Please send patches to . + + +# The "shellcheck disable" line above the timestamp inhibits complaints +# about features and limitations of the classic Bourne shell that were +# superseded or lifted in POSIX. However, this script identifies a wide +# variety of pre-POSIX systems that do not have POSIX shells at all, and +# even some reasonably current systems (Solaris 10 as case-in-point) still +# have a pre-POSIX /bin/sh. + + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright 1992-2022 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +# Just in case it came from the environment. +GUESS= + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +tmp= +# shellcheck disable=SC2172 +trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 + +set_cc_for_build() { + # prevent multiple calls if $tmp is already set + test "$tmp" && return 0 + : "${TMPDIR=/tmp}" + # shellcheck disable=SC2039,SC3028 + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } + dummy=$tmp/dummy + case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in + ,,) echo "int x;" > "$dummy.c" + for driver in cc gcc c89 c99 ; do + if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then + CC_FOR_BUILD=$driver + break + fi + done + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; + esac +} + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if test -f /.attbin/uname ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +case $UNAME_SYSTEM in +Linux|GNU|GNU/*) + LIBC=unknown + + set_cc_for_build + cat <<-EOF > "$dummy.c" + #include + #if defined(__UCLIBC__) + LIBC=uclibc + #elif defined(__dietlibc__) + LIBC=dietlibc + #elif defined(__GLIBC__) + LIBC=gnu + #else + #include + /* First heuristic to detect musl libc. */ + #ifdef __DEFINED_va_list + LIBC=musl + #endif + #endif + EOF + cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'` + eval "$cc_set_libc" + + # Second heuristic to detect musl libc. + if [ "$LIBC" = unknown ] && + command -v ldd >/dev/null && + ldd --version 2>&1 | grep -q ^musl; then + LIBC=musl + fi + + # If the system lacks a compiler, then just pick glibc. + # We could probably try harder. + if [ "$LIBC" = unknown ]; then + LIBC=gnu + fi + ;; +esac + +# Note: order is significant - the case branches are not exclusive. + +case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \ + /sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \ + echo unknown)` + case $UNAME_MACHINE_ARCH in + aarch64eb) machine=aarch64_be-unknown ;; + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + earmv*) + arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'` + endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'` + machine=${arch}${endian}-unknown + ;; + *) machine=$UNAME_MACHINE_ARCH-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently (or will in the future) and ABI. + case $UNAME_MACHINE_ARCH in + earm*) + os=netbsdelf + ;; + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ELF__ + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # Determine ABI tags. + case $UNAME_MACHINE_ARCH in + earm*) + expr='s/^earmv[0-9]/-eabi/;s/eb$//' + abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"` + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case $UNAME_VERSION in + Debian*) + release='-gnu' + ;; + *) + release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + GUESS=$machine-${os}${release}${abi-} + ;; + *:Bitrig:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE + ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE + ;; + *:SecBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE + ;; + *:LibertyBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` + GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE + ;; + *:MidnightBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE + ;; + *:ekkoBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE + ;; + *:SolidBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE + ;; + *:OS108:*:*) + GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE + ;; + macppc:MirBSD:*:*) + GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE + ;; + *:MirBSD:*:*) + GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE + ;; + *:Sortix:*:*) + GUESS=$UNAME_MACHINE-unknown-sortix + ;; + *:Twizzler:*:*) + GUESS=$UNAME_MACHINE-unknown-twizzler + ;; + *:Redox:*:*) + GUESS=$UNAME_MACHINE-unknown-redox + ;; + mips:OSF1:*.*) + GUESS=mips-dec-osf1 + ;; + alpha:OSF1:*:*) + # Reset EXIT trap before exiting to avoid spurious non-zero exit code. + trap '' 0 + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case $ALPHA_CPU_TYPE in + "EV4 (21064)") + UNAME_MACHINE=alpha ;; + "EV4.5 (21064)") + UNAME_MACHINE=alpha ;; + "LCA4 (21066/21068)") + UNAME_MACHINE=alpha ;; + "EV5 (21164)") + UNAME_MACHINE=alphaev5 ;; + "EV5.6 (21164A)") + UNAME_MACHINE=alphaev56 ;; + "EV5.6 (21164PC)") + UNAME_MACHINE=alphapca56 ;; + "EV5.7 (21164PC)") + UNAME_MACHINE=alphapca57 ;; + "EV6 (21264)") + UNAME_MACHINE=alphaev6 ;; + "EV6.7 (21264A)") + UNAME_MACHINE=alphaev67 ;; + "EV6.8CB (21264C)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8AL (21264B)") + UNAME_MACHINE=alphaev68 ;; + "EV6.8CX (21264D)") + UNAME_MACHINE=alphaev68 ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE=alphaev69 ;; + "EV7 (21364)") + UNAME_MACHINE=alphaev7 ;; + "EV7.9 (21364A)") + UNAME_MACHINE=alphaev79 ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + GUESS=$UNAME_MACHINE-dec-osf$OSF_REL + ;; + Amiga*:UNIX_System_V:4.0:*) + GUESS=m68k-unknown-sysv4 + ;; + *:[Aa]miga[Oo][Ss]:*:*) + GUESS=$UNAME_MACHINE-unknown-amigaos + ;; + *:[Mm]orph[Oo][Ss]:*:*) + GUESS=$UNAME_MACHINE-unknown-morphos + ;; + *:OS/390:*:*) + GUESS=i370-ibm-openedition + ;; + *:z/VM:*:*) + GUESS=s390-ibm-zvmoe + ;; + *:OS400:*:*) + GUESS=powerpc-ibm-os400 + ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + GUESS=arm-acorn-riscix$UNAME_RELEASE + ;; + arm*:riscos:*:*|arm*:RISCOS:*:*) + GUESS=arm-unknown-riscos + ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + GUESS=hppa1.1-hitachi-hiuxmpp + ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + case `(/bin/universe) 2>/dev/null` in + att) GUESS=pyramid-pyramid-sysv3 ;; + *) GUESS=pyramid-pyramid-bsd ;; + esac + ;; + NILE*:*:*:dcosx) + GUESS=pyramid-pyramid-svr4 + ;; + DRS?6000:unix:4.0:6*) + GUESS=sparc-icl-nx6 + ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) GUESS=sparc-icl-nx7 ;; + esac + ;; + s390x:SunOS:*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL + ;; + sun4H:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-hal-solaris2$SUN_REL + ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-sun-solaris2$SUN_REL + ;; + i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) + GUESS=i386-pc-auroraux$UNAME_RELEASE + ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + set_cc_for_build + SUN_ARCH=i386 + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH=x86_64 + fi + fi + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=$SUN_ARCH-pc-solaris2$SUN_REL + ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=sparc-sun-solaris3$SUN_REL + ;; + sun4*:SunOS:*:*) + case `/usr/bin/arch -k` in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'` + GUESS=sparc-sun-sunos$SUN_REL + ;; + sun3*:SunOS:*:*) + GUESS=m68k-sun-sunos$UNAME_RELEASE + ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 + case `/bin/arch` in + sun3) + GUESS=m68k-sun-sunos$UNAME_RELEASE + ;; + sun4) + GUESS=sparc-sun-sunos$UNAME_RELEASE + ;; + esac + ;; + aushp:SunOS:*:*) + GUESS=sparc-auspex-sunos$UNAME_RELEASE + ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + GUESS=m68k-atari-mint$UNAME_RELEASE + ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + GUESS=m68k-milan-mint$UNAME_RELEASE + ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + GUESS=m68k-hades-mint$UNAME_RELEASE + ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + GUESS=m68k-unknown-mint$UNAME_RELEASE + ;; + m68k:machten:*:*) + GUESS=m68k-apple-machten$UNAME_RELEASE + ;; + powerpc:machten:*:*) + GUESS=powerpc-apple-machten$UNAME_RELEASE + ;; + RISC*:Mach:*:*) + GUESS=mips-dec-mach_bsd4.3 + ;; + RISC*:ULTRIX:*:*) + GUESS=mips-dec-ultrix$UNAME_RELEASE + ;; + VAX*:ULTRIX*:*:*) + GUESS=vax-dec-ultrix$UNAME_RELEASE + ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + GUESS=clipper-intergraph-clix$UNAME_RELEASE + ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && + dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`"$dummy" "$dummyarg"` && + { echo "$SYSTEM_NAME"; exit; } + GUESS=mips-mips-riscos$UNAME_RELEASE + ;; + Motorola:PowerMAX_OS:*:*) + GUESS=powerpc-motorola-powermax + ;; + Motorola:*:4.3:PL8-*) + GUESS=powerpc-harris-powermax + ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + GUESS=powerpc-harris-powermax + ;; + Night_Hawk:Power_UNIX:*:*) + GUESS=powerpc-harris-powerunix + ;; + m88k:CX/UX:7*:*) + GUESS=m88k-harris-cxux7 + ;; + m88k:*:4*:R4*) + GUESS=m88k-motorola-sysv4 + ;; + m88k:*:3*:R3*) + GUESS=m88k-motorola-sysv3 + ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 + then + if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ + test "$TARGET_BINARY_INTERFACE"x = x + then + GUESS=m88k-dg-dgux$UNAME_RELEASE + else + GUESS=m88k-dg-dguxbcs$UNAME_RELEASE + fi + else + GUESS=i586-dg-dgux$UNAME_RELEASE + fi + ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + GUESS=m88k-dolphin-sysv3 + ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + GUESS=m88k-motorola-sysv3 + ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + GUESS=m88k-tektronix-sysv3 + ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + GUESS=m68k-tektronix-bsd + ;; + *:IRIX*:*:*) + IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'` + GUESS=mips-sgi-irix$IRIX_REL + ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + GUESS=romp-ibm-aix # uname -m gives an 8 hex-code CPU id + ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + GUESS=i386-ibm-aix + ;; + ia64:AIX:*:*) + if test -x /usr/bin/oslevel ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=$UNAME_VERSION.$UNAME_RELEASE + fi + GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV + ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` + then + GUESS=$SYSTEM_NAME + else + GUESS=rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + GUESS=rs6000-ibm-aix3.2.4 + else + GUESS=rs6000-ibm-aix3.2 + fi + ;; + *:AIX:*:[4567]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if test -x /usr/bin/lslpp ; then + IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \ + awk -F: '{ print $3 }' | sed s/[0-9]*$/0/` + else + IBM_REV=$UNAME_VERSION.$UNAME_RELEASE + fi + GUESS=$IBM_ARCH-ibm-aix$IBM_REV + ;; + *:AIX:*:*) + GUESS=rs6000-ibm-aix + ;; + ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) + GUESS=romp-ibm-bsd4.4 + ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + GUESS=romp-ibm-bsd$UNAME_RELEASE # 4.3 with uname added to + ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + GUESS=rs6000-bull-bosx + ;; + DPX/2?00:B.O.S.:*:*) + GUESS=m68k-bull-sysv3 + ;; + 9000/[34]??:4.3bsd:1.*:*) + GUESS=m68k-hp-bsd + ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + GUESS=m68k-hp-bsd4.4 + ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` + case $UNAME_MACHINE in + 9000/31?) HP_ARCH=m68000 ;; + 9000/[34]??) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if test -x /usr/bin/getconf; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case $sc_cpu_version in + 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 + 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case $sc_kernel_bits in + 32) HP_ARCH=hppa2.0n ;; + 64) HP_ARCH=hppa2.0w ;; + '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + esac ;; + esac + fi + if test "$HP_ARCH" = ""; then + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if test "$HP_ARCH" = hppa2.0w + then + set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ + then + HP_ARCH=hppa2.0w + else + HP_ARCH=hppa64 + fi + fi + GUESS=$HP_ARCH-hp-hpux$HPUX_REV + ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'` + GUESS=ia64-hp-hpux$HPUX_REV + ;; + 3050*:HI-UX:*:*) + set_cc_for_build + sed 's/^ //' << EOF > "$dummy.c" + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + GUESS=unknown-hitachi-hiuxwe2 + ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) + GUESS=hppa1.1-hp-bsd + ;; + 9000/8??:4.3bsd:*:*) + GUESS=hppa1.0-hp-bsd + ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + GUESS=hppa1.0-hp-mpeix + ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) + GUESS=hppa1.1-hp-osf + ;; + hp8??:OSF1:*:*) + GUESS=hppa1.0-hp-osf + ;; + i*86:OSF1:*:*) + if test -x /usr/sbin/sysversion ; then + GUESS=$UNAME_MACHINE-unknown-osf1mk + else + GUESS=$UNAME_MACHINE-unknown-osf1 + fi + ;; + parisc*:Lites*:*:*) + GUESS=hppa1.1-hp-lites + ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + GUESS=c1-convex-bsd + ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + GUESS=c34-convex-bsd + ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + GUESS=c38-convex-bsd + ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + GUESS=c4-convex-bsd + ;; + CRAY*Y-MP:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=ymp-cray-unicos$CRAY_REL + ;; + CRAY*[A-Z]90:*:*:*) + echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=t90-cray-unicos$CRAY_REL + ;; + CRAY*T3E:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=alphaev5-cray-unicosmk$CRAY_REL + ;; + CRAY*SV1:*:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=sv1-cray-unicos$CRAY_REL + ;; + *:UNICOS/mp:*:*) + CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'` + GUESS=craynv-cray-unicosmp$CRAY_REL + ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'` + GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} + ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL} + ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE + ;; + sparc*:BSD/OS:*:*) + GUESS=sparc-unknown-bsdi$UNAME_RELEASE + ;; + *:BSD/OS:*:*) + GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE + ;; + arm:FreeBSD:*:*) + UNAME_PROCESSOR=`uname -p` + set_cc_for_build + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi + else + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf + fi + ;; + *:FreeBSD:*:*) + UNAME_PROCESSOR=`/usr/bin/uname -p` + case $UNAME_PROCESSOR in + amd64) + UNAME_PROCESSOR=x86_64 ;; + i386) + UNAME_PROCESSOR=i586 ;; + esac + FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL + ;; + i*:CYGWIN*:*) + GUESS=$UNAME_MACHINE-pc-cygwin + ;; + *:MINGW64*:*) + GUESS=$UNAME_MACHINE-pc-mingw64 + ;; + *:MINGW*:*) + GUESS=$UNAME_MACHINE-pc-mingw32 + ;; + *:MSYS*:*) + GUESS=$UNAME_MACHINE-pc-msys + ;; + i*:PW*:*) + GUESS=$UNAME_MACHINE-pc-pw32 + ;; + *:SerenityOS:*:*) + GUESS=$UNAME_MACHINE-pc-serenity + ;; + *:Interix*:*) + case $UNAME_MACHINE in + x86) + GUESS=i586-pc-interix$UNAME_RELEASE + ;; + authenticamd | genuineintel | EM64T) + GUESS=x86_64-unknown-interix$UNAME_RELEASE + ;; + IA64) + GUESS=ia64-unknown-interix$UNAME_RELEASE + ;; + esac ;; + i*:UWIN*:*) + GUESS=$UNAME_MACHINE-pc-uwin + ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + GUESS=x86_64-pc-cygwin + ;; + prep*:SunOS:5.*:*) + SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'` + GUESS=powerpcle-unknown-solaris2$SUN_REL + ;; + *:GNU:*:*) + # the GNU system + GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'` + GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'` + GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL + ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"` + GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC + ;; + *:Minix:*:*) + GUESS=$UNAME_MACHINE-unknown-minix + ;; + aarch64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + aarch64_be:Linux:*:*) + UNAME_MACHINE=aarch64_be + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + arm*:Linux:*:*) + set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + else + if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_PCS_VFP + then + GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi + else + GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf + fi + fi + ;; + avr32*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + cris:Linux:*:*) + GUESS=$UNAME_MACHINE-axis-linux-$LIBC + ;; + crisv32:Linux:*:*) + GUESS=$UNAME_MACHINE-axis-linux-$LIBC + ;; + e2k:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + frv:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + hexagon:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + i*86:Linux:*:*) + GUESS=$UNAME_MACHINE-pc-linux-$LIBC + ;; + ia64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + k1om:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + m32r*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + m68*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + mips:Linux:*:* | mips64:Linux:*:*) + set_cc_for_build + IS_GLIBC=0 + test x"${LIBC}" = xgnu && IS_GLIBC=1 + sed 's/^ //' << EOF > "$dummy.c" + #undef CPU + #undef mips + #undef mipsel + #undef mips64 + #undef mips64el + #if ${IS_GLIBC} && defined(_ABI64) + LIBCABI=gnuabi64 + #else + #if ${IS_GLIBC} && defined(_ABIN32) + LIBCABI=gnuabin32 + #else + LIBCABI=${LIBC} + #endif + #endif + + #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa64r6 + #else + #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 + CPU=mipsisa32r6 + #else + #if defined(__mips64) + CPU=mips64 + #else + CPU=mips + #endif + #endif + #endif + + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + MIPS_ENDIAN=el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + MIPS_ENDIAN= + #else + MIPS_ENDIAN= + #endif + #endif +EOF + cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'` + eval "$cc_set_vars" + test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } + ;; + mips64el:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + openrisc*:Linux:*:*) + GUESS=or1k-unknown-linux-$LIBC + ;; + or32:Linux:*:* | or1k*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + padre:Linux:*:*) + GUESS=sparc-unknown-linux-$LIBC + ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + GUESS=hppa64-unknown-linux-$LIBC + ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;; + PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;; + *) GUESS=hppa-unknown-linux-$LIBC ;; + esac + ;; + ppc64:Linux:*:*) + GUESS=powerpc64-unknown-linux-$LIBC + ;; + ppc:Linux:*:*) + GUESS=powerpc-unknown-linux-$LIBC + ;; + ppc64le:Linux:*:*) + GUESS=powerpc64le-unknown-linux-$LIBC + ;; + ppcle:Linux:*:*) + GUESS=powerpcle-unknown-linux-$LIBC + ;; + riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + s390:Linux:*:* | s390x:Linux:*:*) + GUESS=$UNAME_MACHINE-ibm-linux-$LIBC + ;; + sh64*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + sh*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + tile*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + vax:Linux:*:*) + GUESS=$UNAME_MACHINE-dec-linux-$LIBC + ;; + x86_64:Linux:*:*) + set_cc_for_build + LIBCABI=$LIBC + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_X32 >/dev/null + then + LIBCABI=${LIBC}x32 + fi + fi + GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI + ;; + xtensa*:Linux:*:*) + GUESS=$UNAME_MACHINE-unknown-linux-$LIBC + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + GUESS=i386-sequent-sysv4 + ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION + ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + GUESS=$UNAME_MACHINE-pc-os2-emx + ;; + i*86:XTS-300:*:STOP) + GUESS=$UNAME_MACHINE-unknown-stop + ;; + i*86:atheos:*:*) + GUESS=$UNAME_MACHINE-unknown-atheos + ;; + i*86:syllable:*:*) + GUESS=$UNAME_MACHINE-pc-syllable + ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) + GUESS=i386-unknown-lynxos$UNAME_RELEASE + ;; + i*86:*DOS:*:*) + GUESS=$UNAME_MACHINE-pc-msdosdjgpp + ;; + i*86:*:4.*:*) + UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL + else + GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL + fi + ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL + else + GUESS=$UNAME_MACHINE-pc-sysv32 + fi + ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i586. + # Note: whatever this is, it MUST be the same as what config.sub + # prints for the "djgpp" host, or else GDB configure will decide that + # this is a cross-build. + GUESS=i586-pc-msdosdjgpp + ;; + Intel:Mach:3*:*) + GUESS=i386-pc-mach3 + ;; + paragon:*:*:*) + GUESS=i860-intel-osf1 + ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + GUESS=i860-stardent-sysv$UNAME_RELEASE # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + GUESS=i860-unknown-sysv$UNAME_RELEASE # Unknown i860-SVR4 + fi + ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + GUESS=m68010-convergent-sysv + ;; + mc68k:UNIX:SYSTEM5:3.51m) + GUESS=m68k-convergent-sysv + ;; + M680?0:D-NIX:5.3:*) + GUESS=m68k-diab-dnix + ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + GUESS=m68k-unknown-lynxos$UNAME_RELEASE + ;; + mc68030:UNIX_System_V:4.*:*) + GUESS=m68k-atari-sysv4 + ;; + TSUNAMI:LynxOS:2.*:*) + GUESS=sparc-unknown-lynxos$UNAME_RELEASE + ;; + rs6000:LynxOS:2.*:*) + GUESS=rs6000-unknown-lynxos$UNAME_RELEASE + ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) + GUESS=powerpc-unknown-lynxos$UNAME_RELEASE + ;; + SM[BE]S:UNIX_SV:*:*) + GUESS=mips-dde-sysv$UNAME_RELEASE + ;; + RM*:ReliantUNIX-*:*:*) + GUESS=mips-sni-sysv4 + ;; + RM*:SINIX-*:*:*) + GUESS=mips-sni-sysv4 + ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + GUESS=$UNAME_MACHINE-sni-sysv4 + else + GUESS=ns32k-sni-sysv + fi + ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + GUESS=i586-unisys-sysv4 + ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + GUESS=hppa1.1-stratus-sysv4 + ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + GUESS=i860-stratus-sysv4 + ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + GUESS=$UNAME_MACHINE-stratus-vos + ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + GUESS=hppa1.1-stratus-vos + ;; + mc68*:A/UX:*:*) + GUESS=m68k-apple-aux$UNAME_RELEASE + ;; + news*:NEWS-OS:6*:*) + GUESS=mips-sony-newsos6 + ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if test -d /usr/nec; then + GUESS=mips-nec-sysv$UNAME_RELEASE + else + GUESS=mips-unknown-sysv$UNAME_RELEASE + fi + ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + GUESS=powerpc-be-beos + ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + GUESS=powerpc-apple-beos + ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + GUESS=i586-pc-beos + ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + GUESS=i586-pc-haiku + ;; + x86_64:Haiku:*:*) + GUESS=x86_64-unknown-haiku + ;; + SX-4:SUPER-UX:*:*) + GUESS=sx4-nec-superux$UNAME_RELEASE + ;; + SX-5:SUPER-UX:*:*) + GUESS=sx5-nec-superux$UNAME_RELEASE + ;; + SX-6:SUPER-UX:*:*) + GUESS=sx6-nec-superux$UNAME_RELEASE + ;; + SX-7:SUPER-UX:*:*) + GUESS=sx7-nec-superux$UNAME_RELEASE + ;; + SX-8:SUPER-UX:*:*) + GUESS=sx8-nec-superux$UNAME_RELEASE + ;; + SX-8R:SUPER-UX:*:*) + GUESS=sx8r-nec-superux$UNAME_RELEASE + ;; + SX-ACE:SUPER-UX:*:*) + GUESS=sxace-nec-superux$UNAME_RELEASE + ;; + Power*:Rhapsody:*:*) + GUESS=powerpc-apple-rhapsody$UNAME_RELEASE + ;; + *:Rhapsody:*:*) + GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE + ;; + arm64:Darwin:*:*) + GUESS=aarch64-apple-darwin$UNAME_RELEASE + ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + if command -v xcode-select > /dev/null 2> /dev/null && \ + ! xcode-select --print-path > /dev/null 2> /dev/null ; then + # Avoid executing cc if there is no toolchain installed as + # cc will be a stub that puts up a graphical alert + # prompting the user to install developer tools. + CC_FOR_BUILD=no_compiler_found + else + set_cc_for_build + fi + if test "$CC_FOR_BUILD" != no_compiler_found; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + case $UNAME_PROCESSOR in + i386) UNAME_PROCESSOR=x86_64 ;; + powerpc) UNAME_PROCESSOR=powerpc64 ;; + esac + fi + # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc + if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ + (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_PPC >/dev/null + then + UNAME_PROCESSOR=powerpc + fi + elif test "$UNAME_PROCESSOR" = i386 ; then + # uname -m returns i386 or x86_64 + UNAME_PROCESSOR=$UNAME_MACHINE + fi + GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE + ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = x86; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE + ;; + *:QNX:*:4*) + GUESS=i386-pc-qnx + ;; + NEO-*:NONSTOP_KERNEL:*:*) + GUESS=neo-tandem-nsk$UNAME_RELEASE + ;; + NSE-*:NONSTOP_KERNEL:*:*) + GUESS=nse-tandem-nsk$UNAME_RELEASE + ;; + NSR-*:NONSTOP_KERNEL:*:*) + GUESS=nsr-tandem-nsk$UNAME_RELEASE + ;; + NSV-*:NONSTOP_KERNEL:*:*) + GUESS=nsv-tandem-nsk$UNAME_RELEASE + ;; + NSX-*:NONSTOP_KERNEL:*:*) + GUESS=nsx-tandem-nsk$UNAME_RELEASE + ;; + *:NonStop-UX:*:*) + GUESS=mips-compaq-nonstopux + ;; + BS2000:POSIX*:*:*) + GUESS=bs2000-siemens-sysv + ;; + DS/*:UNIX_System_V:*:*) + GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE + ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "${cputype-}" = 386; then + UNAME_MACHINE=i386 + elif test "x${cputype-}" != x; then + UNAME_MACHINE=$cputype + fi + GUESS=$UNAME_MACHINE-unknown-plan9 + ;; + *:TOPS-10:*:*) + GUESS=pdp10-unknown-tops10 + ;; + *:TENEX:*:*) + GUESS=pdp10-unknown-tenex + ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + GUESS=pdp10-dec-tops20 + ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + GUESS=pdp10-xkl-tops20 + ;; + *:TOPS-20:*:*) + GUESS=pdp10-unknown-tops20 + ;; + *:ITS:*:*) + GUESS=pdp10-unknown-its + ;; + SEI:*:*:SEIUX) + GUESS=mips-sei-seiux$UNAME_RELEASE + ;; + *:DragonFly:*:*) + DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'` + GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL + ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case $UNAME_MACHINE in + A*) GUESS=alpha-dec-vms ;; + I*) GUESS=ia64-dec-vms ;; + V*) GUESS=vax-dec-vms ;; + esac ;; + *:XENIX:*:SysV) + GUESS=i386-pc-xenix + ;; + i*86:skyos:*:*) + SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'` + GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL + ;; + i*86:rdos:*:*) + GUESS=$UNAME_MACHINE-pc-rdos + ;; + i*86:Fiwix:*:*) + GUESS=$UNAME_MACHINE-pc-fiwix + ;; + *:AROS:*:*) + GUESS=$UNAME_MACHINE-unknown-aros + ;; + x86_64:VMkernel:*:*) + GUESS=$UNAME_MACHINE-unknown-esx + ;; + amd64:Isilon\ OneFS:*:*) + GUESS=x86_64-unknown-onefs + ;; + *:Unleashed:*:*) + GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE + ;; +esac + +# Do we have a guess based on uname results? +if test "x$GUESS" != x; then + echo "$GUESS" + exit +fi + +# No uname command or uname output not recognized. +set_cc_for_build +cat > "$dummy.c" < +#include +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#include +#if defined(_SIZE_T_) || defined(SIGLOST) +#include +#endif +#endif +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); +#endif + +#if defined (vax) +#if !defined (ultrix) +#include +#if defined (BSD) +#if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +#else +#if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#endif +#else + printf ("vax-dec-bsd\n"); exit (0); +#endif +#else +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname un; + uname (&un); + printf ("vax-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("vax-dec-ultrix\n"); exit (0); +#endif +#endif +#endif +#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) +#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) +#if defined(_SIZE_T_) || defined(SIGLOST) + struct utsname *un; + uname (&un); + printf ("mips-dec-ultrix%s\n", un.release); exit (0); +#else + printf ("mips-dec-ultrix\n"); exit (0); +#endif +#endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. +test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } + +echo "$0: unable to guess system type" >&2 + +case $UNAME_MACHINE:$UNAME_SYSTEM in + mips:Linux | mips64:Linux) + # If we got here on MIPS GNU/Linux, output extra information. + cat >&2 <&2 <&2 </dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = "$UNAME_MACHINE" +UNAME_RELEASE = "$UNAME_RELEASE" +UNAME_SYSTEM = "$UNAME_SYSTEM" +UNAME_VERSION = "$UNAME_VERSION" +EOF +fi + +exit 1 + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/3rd/pcre2/config.sub b/3rd/pcre2/config.sub new file mode 100644 index 00000000..dba16e84 --- /dev/null +++ b/3rd/pcre2/config.sub @@ -0,0 +1,1890 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright 1992-2022 Free Software Foundation, Inc. + +# shellcheck disable=SC2006,SC2268 # see below for rationale + +timestamp='2022-01-03' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that +# program. This Exception is an additional permission under section 7 +# of the GNU General Public License, version 3 ("GPLv3"). + + +# Please send patches to . +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# You can get the latest version of this script from: +# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +# The "shellcheck disable" line above the timestamp inhibits complaints +# about features and limitations of the classic Bourne shell that were +# superseded or lifted in POSIX. However, this script identifies a wide +# variety of pre-POSIX systems that do not have POSIX shells at all, and +# even some reasonably current systems (Solaris 10 as case-in-point) still +# have a pre-POSIX /bin/sh. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS + +Canonicalize a configuration name. + +Options: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright 1992-2022 Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo "$1" + exit ;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Split fields of configuration type +# shellcheck disable=SC2162 +saved_IFS=$IFS +IFS="-" read field1 field2 field3 field4 <&2 + exit 1 + ;; + *-*-*-*) + basic_machine=$field1-$field2 + basic_os=$field3-$field4 + ;; + *-*-*) + # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two + # parts + maybe_os=$field2-$field3 + case $maybe_os in + nto-qnx* | linux-* | uclinux-uclibc* \ + | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ + | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ + | storm-chaos* | os2-emx* | rtmk-nova*) + basic_machine=$field1 + basic_os=$maybe_os + ;; + android-linux) + basic_machine=$field1-unknown + basic_os=linux-android + ;; + *) + basic_machine=$field1-$field2 + basic_os=$field3 + ;; + esac + ;; + *-*) + # A lone config we happen to match not fitting any pattern + case $field1-$field2 in + decstation-3100) + basic_machine=mips-dec + basic_os= + ;; + *-*) + # Second component is usually, but not always the OS + case $field2 in + # Prevent following clause from handling this valid os + sun*os*) + basic_machine=$field1 + basic_os=$field2 + ;; + zephyr*) + basic_machine=$field1-unknown + basic_os=$field2 + ;; + # Manufacturers + dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ + | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ + | unicom* | ibm* | next | hp | isi* | apollo | altos* \ + | convergent* | ncr* | news | 32* | 3600* | 3100* \ + | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ + | ultra | tti* | harris | dolphin | highlevel | gould \ + | cbm | ns | masscomp | apple | axis | knuth | cray \ + | microblaze* | sim | cisco \ + | oki | wec | wrs | winbond) + basic_machine=$field1-$field2 + basic_os= + ;; + *) + basic_machine=$field1 + basic_os=$field2 + ;; + esac + ;; + esac + ;; + *) + # Convert single-component short-hands not valid as part of + # multi-component configurations. + case $field1 in + 386bsd) + basic_machine=i386-pc + basic_os=bsd + ;; + a29khif) + basic_machine=a29k-amd + basic_os=udi + ;; + adobe68k) + basic_machine=m68010-adobe + basic_os=scout + ;; + alliant) + basic_machine=fx80-alliant + basic_os= + ;; + altos | altos3068) + basic_machine=m68k-altos + basic_os= + ;; + am29k) + basic_machine=a29k-none + basic_os=bsd + ;; + amdahl) + basic_machine=580-amdahl + basic_os=sysv + ;; + amiga) + basic_machine=m68k-unknown + basic_os= + ;; + amigaos | amigados) + basic_machine=m68k-unknown + basic_os=amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + basic_os=sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + basic_os=sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + basic_os=bsd + ;; + aros) + basic_machine=i386-pc + basic_os=aros + ;; + aux) + basic_machine=m68k-apple + basic_os=aux + ;; + balance) + basic_machine=ns32k-sequent + basic_os=dynix + ;; + blackfin) + basic_machine=bfin-unknown + basic_os=linux + ;; + cegcc) + basic_machine=arm-unknown + basic_os=cegcc + ;; + convex-c1) + basic_machine=c1-convex + basic_os=bsd + ;; + convex-c2) + basic_machine=c2-convex + basic_os=bsd + ;; + convex-c32) + basic_machine=c32-convex + basic_os=bsd + ;; + convex-c34) + basic_machine=c34-convex + basic_os=bsd + ;; + convex-c38) + basic_machine=c38-convex + basic_os=bsd + ;; + cray) + basic_machine=j90-cray + basic_os=unicos + ;; + crds | unos) + basic_machine=m68k-crds + basic_os= + ;; + da30) + basic_machine=m68k-da30 + basic_os= + ;; + decstation | pmax | pmin | dec3100 | decstatn) + basic_machine=mips-dec + basic_os= + ;; + delta88) + basic_machine=m88k-motorola + basic_os=sysv3 + ;; + dicos) + basic_machine=i686-pc + basic_os=dicos + ;; + djgpp) + basic_machine=i586-pc + basic_os=msdosdjgpp + ;; + ebmon29k) + basic_machine=a29k-amd + basic_os=ebmon + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + basic_os=ose + ;; + gmicro) + basic_machine=tron-gmicro + basic_os=sysv + ;; + go32) + basic_machine=i386-pc + basic_os=go32 + ;; + h8300hms) + basic_machine=h8300-hitachi + basic_os=hms + ;; + h8300xray) + basic_machine=h8300-hitachi + basic_os=xray + ;; + h8500hms) + basic_machine=h8500-hitachi + basic_os=hms + ;; + harris) + basic_machine=m88k-harris + basic_os=sysv3 + ;; + hp300 | hp300hpux) + basic_machine=m68k-hp + basic_os=hpux + ;; + hp300bsd) + basic_machine=m68k-hp + basic_os=bsd + ;; + hppaosf) + basic_machine=hppa1.1-hp + basic_os=osf + ;; + hppro) + basic_machine=hppa1.1-hp + basic_os=proelf + ;; + i386mach) + basic_machine=i386-mach + basic_os=mach + ;; + isi68 | isi) + basic_machine=m68k-isi + basic_os=sysv + ;; + m68knommu) + basic_machine=m68k-unknown + basic_os=linux + ;; + magnum | m3230) + basic_machine=mips-mips + basic_os=sysv + ;; + merlin) + basic_machine=ns32k-utek + basic_os=sysv + ;; + mingw64) + basic_machine=x86_64-pc + basic_os=mingw64 + ;; + mingw32) + basic_machine=i686-pc + basic_os=mingw32 + ;; + mingw32ce) + basic_machine=arm-unknown + basic_os=mingw32ce + ;; + monitor) + basic_machine=m68k-rom68k + basic_os=coff + ;; + morphos) + basic_machine=powerpc-unknown + basic_os=morphos + ;; + moxiebox) + basic_machine=moxie-unknown + basic_os=moxiebox + ;; + msdos) + basic_machine=i386-pc + basic_os=msdos + ;; + msys) + basic_machine=i686-pc + basic_os=msys + ;; + mvs) + basic_machine=i370-ibm + basic_os=mvs + ;; + nacl) + basic_machine=le32-unknown + basic_os=nacl + ;; + ncr3000) + basic_machine=i486-ncr + basic_os=sysv4 + ;; + netbsd386) + basic_machine=i386-pc + basic_os=netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + basic_os=linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + basic_os=newsos + ;; + news1000) + basic_machine=m68030-sony + basic_os=newsos + ;; + necv70) + basic_machine=v70-nec + basic_os=sysv + ;; + nh3000) + basic_machine=m68k-harris + basic_os=cxux + ;; + nh[45]000) + basic_machine=m88k-harris + basic_os=cxux + ;; + nindy960) + basic_machine=i960-intel + basic_os=nindy + ;; + mon960) + basic_machine=i960-intel + basic_os=mon960 + ;; + nonstopux) + basic_machine=mips-compaq + basic_os=nonstopux + ;; + os400) + basic_machine=powerpc-ibm + basic_os=os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + basic_os=ose + ;; + os68k) + basic_machine=m68k-none + basic_os=os68k + ;; + paragon) + basic_machine=i860-intel + basic_os=osf + ;; + parisc) + basic_machine=hppa-unknown + basic_os=linux + ;; + psp) + basic_machine=mipsallegrexel-sony + basic_os=psp + ;; + pw32) + basic_machine=i586-unknown + basic_os=pw32 + ;; + rdos | rdos64) + basic_machine=x86_64-pc + basic_os=rdos + ;; + rdos32) + basic_machine=i386-pc + basic_os=rdos + ;; + rom68k) + basic_machine=m68k-rom68k + basic_os=coff + ;; + sa29200) + basic_machine=a29k-amd + basic_os=udi + ;; + sei) + basic_machine=mips-sei + basic_os=seiux + ;; + sequent) + basic_machine=i386-sequent + basic_os= + ;; + sps7) + basic_machine=m68k-bull + basic_os=sysv2 + ;; + st2000) + basic_machine=m68k-tandem + basic_os= + ;; + stratus) + basic_machine=i860-stratus + basic_os=sysv4 + ;; + sun2) + basic_machine=m68000-sun + basic_os= + ;; + sun2os3) + basic_machine=m68000-sun + basic_os=sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + basic_os=sunos4 + ;; + sun3) + basic_machine=m68k-sun + basic_os= + ;; + sun3os3) + basic_machine=m68k-sun + basic_os=sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + basic_os=sunos4 + ;; + sun4) + basic_machine=sparc-sun + basic_os= + ;; + sun4os3) + basic_machine=sparc-sun + basic_os=sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + basic_os=sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + basic_os=solaris2 + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + basic_os= + ;; + sv1) + basic_machine=sv1-cray + basic_os=unicos + ;; + symmetry) + basic_machine=i386-sequent + basic_os=dynix + ;; + t3e) + basic_machine=alphaev5-cray + basic_os=unicos + ;; + t90) + basic_machine=t90-cray + basic_os=unicos + ;; + toad1) + basic_machine=pdp10-xkl + basic_os=tops20 + ;; + tpf) + basic_machine=s390x-ibm + basic_os=tpf + ;; + udi29k) + basic_machine=a29k-amd + basic_os=udi + ;; + ultra3) + basic_machine=a29k-nyu + basic_os=sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + basic_os=none + ;; + vaxv) + basic_machine=vax-dec + basic_os=sysv + ;; + vms) + basic_machine=vax-dec + basic_os=vms + ;; + vsta) + basic_machine=i386-pc + basic_os=vsta + ;; + vxworks960) + basic_machine=i960-wrs + basic_os=vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + basic_os=vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + basic_os=vxworks + ;; + xbox) + basic_machine=i686-pc + basic_os=mingw32 + ;; + ymp) + basic_machine=ymp-cray + basic_os=unicos + ;; + *) + basic_machine=$1 + basic_os= + ;; + esac + ;; +esac + +# Decode 1-component or ad-hoc basic machines +case $basic_machine in + # Here we handle the default manufacturer of certain CPU types. It is in + # some cases the only manufacturer, in others, it is the most popular. + w89k) + cpu=hppa1.1 + vendor=winbond + ;; + op50n) + cpu=hppa1.1 + vendor=oki + ;; + op60c) + cpu=hppa1.1 + vendor=oki + ;; + ibm*) + cpu=i370 + vendor=ibm + ;; + orion105) + cpu=clipper + vendor=highlevel + ;; + mac | mpw | mac-mpw) + cpu=m68k + vendor=apple + ;; + pmac | pmac-mpw) + cpu=powerpc + vendor=apple + ;; + + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + cpu=m68000 + vendor=att + ;; + 3b*) + cpu=we32k + vendor=att + ;; + bluegene*) + cpu=powerpc + vendor=ibm + basic_os=cnk + ;; + decsystem10* | dec10*) + cpu=pdp10 + vendor=dec + basic_os=tops10 + ;; + decsystem20* | dec20*) + cpu=pdp10 + vendor=dec + basic_os=tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + cpu=m68k + vendor=motorola + ;; + dpx2*) + cpu=m68k + vendor=bull + basic_os=sysv3 + ;; + encore | umax | mmax) + cpu=ns32k + vendor=encore + ;; + elxsi) + cpu=elxsi + vendor=elxsi + basic_os=${basic_os:-bsd} + ;; + fx2800) + cpu=i860 + vendor=alliant + ;; + genix) + cpu=ns32k + vendor=ns + ;; + h3050r* | hiux*) + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + cpu=m68000 + vendor=hp + ;; + hp9k3[2-9][0-9]) + cpu=m68k + vendor=hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + cpu=hppa1.1 + vendor=hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + cpu=hppa1.0 + vendor=hp + ;; + i*86v32) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv32 + ;; + i*86v4*) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv4 + ;; + i*86v) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=sysv + ;; + i*86sol2) + cpu=`echo "$1" | sed -e 's/86.*/86/'` + vendor=pc + basic_os=solaris2 + ;; + j90 | j90-cray) + cpu=j90 + vendor=cray + basic_os=${basic_os:-unicos} + ;; + iris | iris4d) + cpu=mips + vendor=sgi + case $basic_os in + irix*) + ;; + *) + basic_os=irix4 + ;; + esac + ;; + miniframe) + cpu=m68000 + vendor=convergent + ;; + *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) + cpu=m68k + vendor=atari + basic_os=mint + ;; + news-3600 | risc-news) + cpu=mips + vendor=sony + basic_os=newsos + ;; + next | m*-next) + cpu=m68k + vendor=next + case $basic_os in + openstep*) + ;; + nextstep*) + ;; + ns2*) + basic_os=nextstep2 + ;; + *) + basic_os=nextstep3 + ;; + esac + ;; + np1) + cpu=np1 + vendor=gould + ;; + op50n-* | op60c-*) + cpu=hppa1.1 + vendor=oki + basic_os=proelf + ;; + pa-hitachi) + cpu=hppa1.1 + vendor=hitachi + basic_os=hiuxwe2 + ;; + pbd) + cpu=sparc + vendor=tti + ;; + pbb) + cpu=m68k + vendor=tti + ;; + pc532) + cpu=ns32k + vendor=pc532 + ;; + pn) + cpu=pn + vendor=gould + ;; + power) + cpu=power + vendor=ibm + ;; + ps2) + cpu=i386 + vendor=ibm + ;; + rm[46]00) + cpu=mips + vendor=siemens + ;; + rtpc | rtpc-*) + cpu=romp + vendor=ibm + ;; + sde) + cpu=mipsisa32 + vendor=sde + basic_os=${basic_os:-elf} + ;; + simso-wrs) + cpu=sparclite + vendor=wrs + basic_os=vxworks + ;; + tower | tower-32) + cpu=m68k + vendor=ncr + ;; + vpp*|vx|vx-*) + cpu=f301 + vendor=fujitsu + ;; + w65) + cpu=w65 + vendor=wdc + ;; + w89k-*) + cpu=hppa1.1 + vendor=winbond + basic_os=proelf + ;; + none) + cpu=none + vendor=none + ;; + leon|leon[3-9]) + cpu=sparc + vendor=$basic_machine + ;; + leon-*|leon[3-9]-*) + cpu=sparc + vendor=`echo "$basic_machine" | sed 's/-.*//'` + ;; + + *-*) + # shellcheck disable=SC2162 + saved_IFS=$IFS + IFS="-" read cpu vendor <&2 + exit 1 + ;; + esac + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $vendor in + digital*) + vendor=dec + ;; + commodore*) + vendor=cbm + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if test x$basic_os != x +then + +# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just +# set os. +case $basic_os in + gnu/linux*) + kernel=linux + os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'` + ;; + os2-emx) + kernel=os2 + os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'` + ;; + nto-qnx*) + kernel=nto + os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'` + ;; + *-*) + # shellcheck disable=SC2162 + saved_IFS=$IFS + IFS="-" read kernel os <&2 + exit 1 + ;; +esac + +# As a final step for OS-related things, validate the OS-kernel combination +# (given a valid OS), if there is a kernel. +case $kernel-$os in + linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \ + | linux-musl* | linux-relibc* | linux-uclibc* ) + ;; + uclinux-uclibc* ) + ;; + -dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* ) + # These are just libc implementations, not actual OSes, and thus + # require a kernel. + echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 + exit 1 + ;; + kfreebsd*-gnu* | kopensolaris*-gnu*) + ;; + vxworks-simlinux | vxworks-simwindows | vxworks-spe) + ;; + nto-qnx*) + ;; + os2-emx) + ;; + *-eabi* | *-gnueabi*) + ;; + -*) + # Blank kernel with real OS is always fine. + ;; + *-*) + echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 + exit 1 + ;; +esac + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +case $vendor in + unknown) + case $cpu-$os in + *-riscix*) + vendor=acorn + ;; + *-sunos*) + vendor=sun + ;; + *-cnk* | *-aix*) + vendor=ibm + ;; + *-beos*) + vendor=be + ;; + *-hpux*) + vendor=hp + ;; + *-mpeix*) + vendor=hp + ;; + *-hiux*) + vendor=hitachi + ;; + *-unos*) + vendor=crds + ;; + *-dgux*) + vendor=dg + ;; + *-luna*) + vendor=omron + ;; + *-genix*) + vendor=ns + ;; + *-clix*) + vendor=intergraph + ;; + *-mvs* | *-opened*) + vendor=ibm + ;; + *-os400*) + vendor=ibm + ;; + s390-* | s390x-*) + vendor=ibm + ;; + *-ptx*) + vendor=sequent + ;; + *-tpf*) + vendor=ibm + ;; + *-vxsim* | *-vxworks* | *-windiss*) + vendor=wrs + ;; + *-aux*) + vendor=apple + ;; + *-hms*) + vendor=hitachi + ;; + *-mpw* | *-macos*) + vendor=apple + ;; + *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) + vendor=atari + ;; + *-vos*) + vendor=stratus + ;; + esac + ;; +esac + +echo "$cpu-$vendor-${kernel:+$kernel-}$os" +exit + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/3rd/pcre2/configure b/3rd/pcre2/configure new file mode 100644 index 00000000..6d202d65 --- /dev/null +++ b/3rd/pcre2/configure @@ -0,0 +1,19139 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.71 for PCRE2 10.45. +# +# +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else $as_nop + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else \$as_nop + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : + +else \$as_nop + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 + + test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ + || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null +then : + as_have_required=yes +else $as_nop + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : + +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$as_shell as_have_required=yes + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : + break 2 +fi +fi + done;; + esac + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi +fi + + + if test "x$CONFIG_SHELL" != x +then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." + else + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system, +$0: including any error possibly output before this +$0: message. Then install a modern shell, or manually run +$0: the script under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else $as_nop + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else $as_nop + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + +SHELL=${CONFIG_SHELL-/bin/sh} + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='PCRE2' +PACKAGE_TARNAME='pcre2' +PACKAGE_VERSION='10.45' +PACKAGE_STRING='PCRE2 10.45' +PACKAGE_BUGREPORT='' +PACKAGE_URL='' + +ac_unique_file="src/pcre2.h.in" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_STDIO_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif +#ifdef HAVE_STRING_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_header_c_list= +ac_subst_vars='am__EXEEXT_FALSE +am__EXEEXT_TRUE +LTLIBOBJS +LIBOBJS +LIB_POSTFIX +CET_CFLAGS +WITH_GCOV_FALSE +WITH_GCOV_TRUE +GCOV_LIBS +GCOV_CXXFLAGS +GCOV_CFLAGS +GENHTML +LCOV +SHTOOL +VALGRIND_LIBS +VALGRIND_CFLAGS +PKG_CONFIG_LIBDIR +PKG_CONFIG_PATH +PKG_CONFIG +LIBBZ2 +LIBZ +DISTCHECK_CONFIGURE_FLAGS +EXTRA_LIBPCRE2_POSIX_LDFLAGS +EXTRA_LIBPCRE2_32_LDFLAGS +EXTRA_LIBPCRE2_16_LDFLAGS +EXTRA_LIBPCRE2_8_LDFLAGS +PTHREAD_CFLAGS +PTHREAD_LIBS +PTHREAD_CC +ax_pthread_config +PCRE2POSIX_CFLAG +PCRE2_STATIC_CFLAG +LIBREADLINE +WITH_DIFF_FUZZ_SUPPORT_FALSE +WITH_DIFF_FUZZ_SUPPORT_TRUE +WITH_FUZZ_SUPPORT_FALSE +WITH_FUZZ_SUPPORT_TRUE +WITH_VALGRIND_FALSE +WITH_VALGRIND_TRUE +WITH_UNICODE_FALSE +WITH_UNICODE_TRUE +WITH_JIT_FALSE +WITH_JIT_TRUE +WITH_REBUILD_CHARTABLES_FALSE +WITH_REBUILD_CHARTABLES_TRUE +WITH_PCRE2_32_FALSE +WITH_PCRE2_32_TRUE +WITH_PCRE2_16_FALSE +WITH_PCRE2_16_TRUE +WITH_PCRE2_8_FALSE +WITH_PCRE2_8_TRUE +enable_pcre2_32 +enable_pcre2_16 +enable_pcre2_8 +PCRE2_DATE +PCRE2_PRERELEASE +PCRE2_MINOR +PCRE2_MAJOR +HAVE_VISIBILITY +VISIBILITY_CFLAGS +LT_SYS_LIBRARY_PATH +OTOOL64 +OTOOL +LIPO +NMEDIT +DSYMUTIL +MANIFEST_TOOL +RANLIB +FILECMD +LN_S +NM +ac_ct_DUMPBIN +DUMPBIN +LD +FGREP +EGREP +GREP +SED +host_os +host_vendor +host_cpu +host +build_os +build_vendor +build_cpu +build +LIBTOOL +OBJDUMP +DLLTOOL +AS +ac_ct_AR +AR +am__fastdepCC_FALSE +am__fastdepCC_TRUE +CCDEPMODE +am__nodep +AMDEPBACKSLASH +AMDEP_FALSE +AMDEP_TRUE +am__include +DEPDIR +OBJEXT +EXEEXT +ac_ct_CC +CPPFLAGS +LDFLAGS +CFLAGS +CC +MAINT +MAINTAINER_MODE_FALSE +MAINTAINER_MODE_TRUE +AM_BACKSLASH +AM_DEFAULT_VERBOSITY +AM_DEFAULT_V +AM_V +CSCOPE +ETAGS +CTAGS +am__untar +am__tar +AMTAR +am__leading_dot +SET_MAKE +AWK +mkdir_p +MKDIR_P +INSTALL_STRIP_PROGRAM +STRIP +install_sh +MAKEINFO +AUTOHEADER +AUTOMAKE +AUTOCONF +ACLOCAL +VERSION +PACKAGE +CYGPATH_W +am__isrc +INSTALL_DATA +INSTALL_SCRIPT +INSTALL_PROGRAM +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL +am__quote' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +enable_silent_rules +enable_maintainer_mode +enable_dependency_tracking +enable_shared +enable_static +with_pic +enable_fast_install +with_aix_soname +with_gnu_ld +with_sysroot +enable_libtool_lock +enable_largefile +enable_pcre8 +enable_pcre16 +enable_pcre32 +enable_pcre2_8 +enable_pcre2_16 +enable_pcre2_32 +enable_debug +enable_jit +enable_jit_sealloc +enable_pcre2grep_jit +enable_pcre2grep_callout +enable_pcre2grep_callout_fork +enable_rebuild_chartables +enable_unicode +enable_newline_is_cr +enable_newline_is_lf +enable_newline_is_crlf +enable_newline_is_anycrlf +enable_newline_is_any +enable_newline_is_nul +enable_bsr_anycrlf +enable_never_backslash_C +enable_ebcdic +enable_ebcdic_nl25 +enable_pcre2grep_libz +enable_pcre2grep_libbz2 +with_pcre2grep_bufsize +with_pcre2grep_max_bufsize +enable_pcre2test_libedit +enable_pcre2test_libreadline +with_link_size +with_max_varlookbehind +with_parens_nest_limit +with_heap_limit +with_match_limit +with_match_limit_depth +with_match_limit_recursion +enable_valgrind +enable_coverage +enable_fuzz_support +enable_diff_fuzz_support +enable_stack_for_recursion +enable_percent_zt +' + ac_precious_vars='build_alias +host_alias +target_alias +CC +CFLAGS +LDFLAGS +LIBS +CPPFLAGS +LT_SYS_LIBRARY_PATH +PKG_CONFIG +PKG_CONFIG_PATH +PKG_CONFIG_LIBDIR +VALGRIND_CFLAGS +VALGRIND_LIBS +LCOV +GENHTML' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: \`$ac_useropt'" + ac_useropt_orig=$ac_useropt + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures PCRE2 10.45 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/pcre2] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF + +Program names: + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM run sed PROGRAM on installed program names + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of PCRE2 10.45:";; + esac + cat <<\_ACEOF + +Optional Features: + --disable-option-checking ignore unrecognized --enable/--with options + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --enable-silent-rules less verbose build output (undo: "make V=1") + --disable-silent-rules verbose build output (undo: "make V=0") + --enable-maintainer-mode + enable make rules and dependencies not useful (and + sometimes confusing) to the casual installer + --enable-dependency-tracking + do not reject slow dependency extractors + --disable-dependency-tracking + speeds up one-time build + --enable-shared[=PKGS] build shared libraries [default=yes] + --enable-static[=PKGS] build static libraries [default=yes] + --enable-fast-install[=PKGS] + optimize for fast installation [default=yes] + --disable-libtool-lock avoid locking (might break parallel builds) + --disable-largefile omit support for large files + + --disable-pcre2-8 disable 8 bit character support + --enable-pcre2-16 enable 16 bit character support + --enable-pcre2-32 enable 32 bit character support + --enable-debug enable debugging code + --enable-jit enable Just-In-Time compiling support + --enable-jit-sealloc enable SELinux compatible execmem allocator in JIT + (experimental) + --disable-pcre2grep-jit disable JIT support in pcre2grep + --disable-pcre2grep-callout + disable callout script support in pcre2grep + --disable-pcre2grep-callout-fork + disable callout script fork support in pcre2grep + --enable-rebuild-chartables + rebuild character tables in current locale + --disable-unicode disable Unicode support + --enable-newline-is-cr use CR as newline character + --enable-newline-is-lf use LF as newline character (default) + --enable-newline-is-crlf + use CRLF as newline sequence + --enable-newline-is-anycrlf + use CR, LF, or CRLF as newline sequence + --enable-newline-is-any use any valid Unicode newline sequence + --enable-newline-is-nul use NUL (binary zero) as newline character + --enable-bsr-anycrlf \R matches only CR, LF, CRLF by default + --enable-never-backslash-C + use of \C causes an error + --enable-ebcdic assume EBCDIC coding rather than ASCII; incompatible + with --enable-unicode; use only in (uncommon) EBCDIC + environments; it implies --enable-rebuild-chartables + --enable-ebcdic-nl25 set EBCDIC code for NL to 0x25 instead of 0x15; it + implies --enable-ebcdic + --enable-pcre2grep-libz link pcre2grep with libz to handle .gz files + --enable-pcre2grep-libbz2 + link pcre2grep with libbz2 to handle .bz2 files + --enable-pcre2test-libedit + link pcre2test with libedit + --enable-pcre2test-libreadline + link pcre2test with libreadline + --enable-valgrind enable valgrind support + --enable-coverage enable code coverage reports using gcov + --enable-fuzz-support enable fuzzer support + --enable-diff-fuzz-support + enable differential fuzzer support + --disable-percent-zt disable the use of z and t formatting modifiers + +Optional Packages: + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use + both] + --with-aix-soname=aix|svr4|both + shared library versioning (aka "SONAME") variant to + provide on AIX, [default=aix]. + --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-sysroot[=DIR] Search for dependent libraries within DIR (or the + compiler's sysroot if not specified). + --with-pcre2grep-bufsize=N + pcre2grep initial buffer size (default=20480, + minimum=8192) + --with-pcre2grep-max-bufsize=N + pcre2grep maximum buffer size (default=1048576, + minimum=8192) + --with-link-size=N internal link size (2, 3, or 4 allowed; default=2) + --with-max-varlookbehind=N + maximum length of variable lookbehind (default=255) + --with-parens-nest-limit=N + nested parentheses limit (default=250) + --with-heap-limit=N default limit on heap memory (kibibytes, + default=20000000) + --with-match-limit=N default limit on internal looping (default=10000000) + --with-match-limit-depth=N + default limit on match tree depth + (default=MATCH_LIMIT) + + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + LT_SYS_LIBRARY_PATH + User-defined run-time library search path. + PKG_CONFIG path to pkg-config utility + PKG_CONFIG_PATH + directories to add to pkg-config's search path + PKG_CONFIG_LIBDIR + path overriding pkg-config's built-in search path + VALGRIND_CFLAGS + C compiler flags for VALGRIND, overriding pkg-config + VALGRIND_LIBS + linker flags for VALGRIND, overriding pkg-config + LCOV the ltp lcov program + GENHTML the ltp genhtml program + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to the package provider. +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +PCRE2 configure 10.45 +generated by GNU Autoconf 2.71 + +Copyright (C) 2021 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_find_intX_t LINENO BITS VAR +# ----------------------------------- +# Finds a signed integer type with width BITS, setting cache variable VAR +# accordingly. +ac_fn_c_find_intX_t () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for int$2_t" >&5 +printf %s "checking for int$2_t... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + eval "$3=no" + # Order is important - never check a type that is potentially smaller + # than half of the expected target width. + for ac_type in int$2_t 'int' 'long int' \ + 'long long int' 'short int' 'signed char'; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + enum { N = $2 / 2 - 1 }; +int +main (void) +{ +static int test_array [1 - 2 * !(0 < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1))]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + enum { N = $2 / 2 - 1 }; +int +main (void) +{ +static int test_array [1 - 2 * !(($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1) + < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 2))]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + case $ac_type in #( + int$2_t) : + eval "$3=yes" ;; #( + *) : + eval "$3=\$ac_type" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + if eval test \"x\$"$3"\" = x"no" +then : + +else $as_nop + break +fi + done +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_find_intX_t + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + } +then : + ac_retval=0 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. */ + +#include +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main (void) +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + eval "$3=yes" +else $as_nop + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +printf %s "checking for $2... " >&6; } +if eval test \${$3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main (void) +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +eval ac_res=\$$3 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by PCRE2 $as_me 10.45, which was +generated by GNU Autoconf 2.71. Invocation command line was + + $ $0$ac_configure_args_raw + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" + # Save into config.log some information that might help in debugging. + { + echo + + printf "%s\n" "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + printf "%s\n" "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + printf "%s\n" "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + printf "%s\n" "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + printf "%s\n" "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +printf "%s\n" "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h + +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +if test -n "$CONFIG_SITE"; then + ac_site_files="$CONFIG_SITE" +elif test "x$prefix" != xNONE; then + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" +else + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" +fi + +for ac_site_file in $ac_site_files +do + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Test code for whether the C compiler supports C89 (global declarations) +ac_c_conftest_c89_globals=' +/* Does the compiler advertise C89 conformance? + Do not test the value of __STDC__, because some compilers set it to 0 + while being otherwise adequately conformant. */ +#if !defined __STDC__ +# error "Compiler does not advertise C89 conformance" +#endif + +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7 src/conf.sh. */ +struct buf { int x; }; +struct buf * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not \xHH hex character constants. + These do not provoke an error unfortunately, instead are silently treated + as an "x". The following induces an error, until -std is added to get + proper ANSI mode. Curiously \x00 != x always comes out true, for an + array size at least. It is necessary to write \x00 == 0 to get something + that is true only with -std. */ +int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) '\''x'\'' +int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int), + int, int);' + +# Test code for whether the C compiler supports C89 (body of main). +ac_c_conftest_c89_main=' +ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]); +' + +# Test code for whether the C compiler supports C99 (global declarations) +ac_c_conftest_c99_globals=' +// Does the compiler advertise C99 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L +# error "Compiler does not advertise C99 conformance" +#endif + +#include +extern int puts (const char *); +extern int printf (const char *, ...); +extern int dprintf (int, const char *, ...); +extern void *malloc (size_t); + +// Check varargs macros. These examples are taken from C99 6.10.3.5. +// dprintf is used instead of fprintf to avoid needing to declare +// FILE and stderr. +#define debug(...) dprintf (2, __VA_ARGS__) +#define showlist(...) puts (#__VA_ARGS__) +#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) +static void +test_varargs_macros (void) +{ + int x = 1234; + int y = 5678; + debug ("Flag"); + debug ("X = %d\n", x); + showlist (The first, second, and third items.); + report (x>y, "x is %d but y is %d", x, y); +} + +// Check long long types. +#define BIG64 18446744073709551615ull +#define BIG32 4294967295ul +#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) +#if !BIG_OK + #error "your preprocessor is broken" +#endif +#if BIG_OK +#else + #error "your preprocessor is broken" +#endif +static long long int bignum = -9223372036854775807LL; +static unsigned long long int ubignum = BIG64; + +struct incomplete_array +{ + int datasize; + double data[]; +}; + +struct named_init { + int number; + const wchar_t *name; + double average; +}; + +typedef const char *ccp; + +static inline int +test_restrict (ccp restrict text) +{ + // See if C++-style comments work. + // Iterate through items via the restricted pointer. + // Also check for declarations in for loops. + for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i) + continue; + return 0; +} + +// Check varargs and va_copy. +static bool +test_varargs (const char *format, ...) +{ + va_list args; + va_start (args, format); + va_list args_copy; + va_copy (args_copy, args); + + const char *str = ""; + int number = 0; + float fnumber = 0; + + while (*format) + { + switch (*format++) + { + case '\''s'\'': // string + str = va_arg (args_copy, const char *); + break; + case '\''d'\'': // int + number = va_arg (args_copy, int); + break; + case '\''f'\'': // float + fnumber = va_arg (args_copy, double); + break; + default: + break; + } + } + va_end (args_copy); + va_end (args); + + return *str && number && fnumber; +} +' + +# Test code for whether the C compiler supports C99 (body of main). +ac_c_conftest_c99_main=' + // Check bool. + _Bool success = false; + success |= (argc != 0); + + // Check restrict. + if (test_restrict ("String literal") == 0) + success = true; + char *restrict newvar = "Another string"; + + // Check varargs. + success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234); + test_varargs_macros (); + + // Check flexible array members. + struct incomplete_array *ia = + malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); + ia->datasize = 10; + for (int i = 0; i < ia->datasize; ++i) + ia->data[i] = i * 1.234; + + // Check named initializers. + struct named_init ni = { + .number = 34, + .name = L"Test wide string", + .average = 543.34343, + }; + + ni.number = 58; + + int dynamic_array[ni.number]; + dynamic_array[0] = argv[0][0]; + dynamic_array[ni.number - 1] = 543; + + // work around unused variable warnings + ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\'' + || dynamic_array[ni.number - 1] != 543); +' + +# Test code for whether the C compiler supports C11 (global declarations) +ac_c_conftest_c11_globals=' +// Does the compiler advertise C11 conformance? +#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L +# error "Compiler does not advertise C11 conformance" +#endif + +// Check _Alignas. +char _Alignas (double) aligned_as_double; +char _Alignas (0) no_special_alignment; +extern char aligned_as_int; +char _Alignas (0) _Alignas (int) aligned_as_int; + +// Check _Alignof. +enum +{ + int_alignment = _Alignof (int), + int_array_alignment = _Alignof (int[100]), + char_alignment = _Alignof (char) +}; +_Static_assert (0 < -_Alignof (int), "_Alignof is signed"); + +// Check _Noreturn. +int _Noreturn does_not_return (void) { for (;;) continue; } + +// Check _Static_assert. +struct test_static_assert +{ + int x; + _Static_assert (sizeof (int) <= sizeof (long int), + "_Static_assert does not work in struct"); + long int y; +}; + +// Check UTF-8 literals. +#define u8 syntax error! +char const utf8_literal[] = u8"happens to be ASCII" "another string"; + +// Check duplicate typedefs. +typedef long *long_ptr; +typedef long int *long_ptr; +typedef long_ptr long_ptr; + +// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1. +struct anonymous +{ + union { + struct { int i; int j; }; + struct { int k; long int l; } w; + }; + int m; +} v1; +' + +# Test code for whether the C compiler supports C11 (body of main). +ac_c_conftest_c11_main=' + _Static_assert ((offsetof (struct anonymous, i) + == offsetof (struct anonymous, w.k)), + "Anonymous union alignment botch"); + v1.i = 2; + v1.w.k = 5; + ok |= v1.i != 5; +' + +# Test code for whether the C compiler supports C11 (complete). +ac_c_conftest_c11_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} +${ac_c_conftest_c11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + ${ac_c_conftest_c11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C99 (complete). +ac_c_conftest_c99_program="${ac_c_conftest_c89_globals} +${ac_c_conftest_c99_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + ${ac_c_conftest_c99_main} + return ok; +} +" + +# Test code for whether the C compiler supports C89 (complete). +ac_c_conftest_c89_program="${ac_c_conftest_c89_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_c_conftest_c89_main} + return ok; +} +" + +as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H" +as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H" +as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H" +as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H" +as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H" +as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H" +as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H" +as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H" +as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H" +as_fn_append ac_header_c_list " wchar.h wchar_h HAVE_WCHAR_H" +as_fn_append ac_header_c_list " minix/config.h minix_config_h HAVE_MINIX_CONFIG_H" + +# Auxiliary files required by this configure script. +ac_aux_files="config.guess config.sub ltmain.sh ar-lib compile missing install-sh" + +# Locations in which to look for auxiliary files. +ac_aux_dir_candidates="${srcdir}${PATH_SEPARATOR}${srcdir}/..${PATH_SEPARATOR}${srcdir}/../.." + +# Search for a directory containing all of the required auxiliary files, +# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates. +# If we don't find one directory that contains all the files we need, +# we report the set of missing files from the *first* directory in +# $ac_aux_dir_candidates and give up. +ac_missing_aux_files="" +ac_first_candidate=: +printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: $ac_aux_files" >&5 +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in $ac_aux_dir_candidates +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + as_found=: + + printf "%s\n" "$as_me:${as_lineno-$LINENO}: trying $as_dir" >&5 + ac_aux_dir_found=yes + ac_install_sh= + for ac_aux in $ac_aux_files + do + # As a special case, if "install-sh" is required, that requirement + # can be satisfied by any of "install-sh", "install.sh", or "shtool", + # and $ac_install_sh is set appropriately for whichever one is found. + if test x"$ac_aux" = x"install-sh" + then + if test -f "${as_dir}install-sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install-sh found" >&5 + ac_install_sh="${as_dir}install-sh -c" + elif test -f "${as_dir}install.sh"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}install.sh found" >&5 + ac_install_sh="${as_dir}install.sh -c" + elif test -f "${as_dir}shtool"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}shtool found" >&5 + ac_install_sh="${as_dir}shtool install -c" + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} install-sh" + else + break + fi + fi + else + if test -f "${as_dir}${ac_aux}"; then + printf "%s\n" "$as_me:${as_lineno-$LINENO}: ${as_dir}${ac_aux} found" >&5 + else + ac_aux_dir_found=no + if $ac_first_candidate; then + ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}" + else + break + fi + fi + fi + done + if test "$ac_aux_dir_found" = yes; then + ac_aux_dir="$as_dir" + break + fi + ac_first_candidate=false + + as_found=false +done +IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" "$LINENO" 5 +fi + + +# These three variables are undocumented and unsupported, +# and are intended to be withdrawn in a future Autoconf release. +# They can cause serious problems if a builder's source tree is in a directory +# whose full name contains unusual characters. +if test -f "${ac_aux_dir}config.guess"; then + ac_config_guess="$SHELL ${ac_aux_dir}config.guess" +fi +if test -f "${ac_aux_dir}config.sub"; then + ac_config_sub="$SHELL ${ac_aux_dir}config.sub" +fi +if test -f "$ac_aux_dir/configure"; then + ac_configure="$SHELL ${ac_aux_dir}configure" +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +am__api_version='1.16' + + + + # Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AmigaOS /C/install, which installs bootblocks on floppy discs +# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# OS/2's system install, which has a completely different semantic +# ./install, which can be erroneously created by make from ./install.sh. +# Reject install programs that cannot install multiple files. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 +printf %s "checking for a BSD-compatible install... " >&6; } +if test -z "$INSTALL"; then +if test ${ac_cv_path_install+y} +then : + printf %s "(cached) " >&6 +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + # Account for fact that we put trailing slashes in our PATH walk. +case $as_dir in #(( + ./ | /[cC]/* | \ + /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ + ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ + /usr/ucb/* ) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + # Don't use installbsd from OSF since it installs stuff as root + # by default. + for ac_prog in ginstall scoinst install; do + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext"; then + if test $ac_prog = install && + grep dspmsg "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + : + elif test $ac_prog = install && + grep pwplus "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then + # program-specific install script used by HP pwplus--don't use. + : + else + rm -rf conftest.one conftest.two conftest.dir + echo one > conftest.one + echo two > conftest.two + mkdir conftest.dir + if "$as_dir$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir/" && + test -s conftest.one && test -s conftest.two && + test -s conftest.dir/conftest.one && + test -s conftest.dir/conftest.two + then + ac_cv_path_install="$as_dir$ac_prog$ac_exec_ext -c" + break 3 + fi + fi + fi + done + done + ;; +esac + + done +IFS=$as_save_IFS + +rm -rf conftest.one conftest.two conftest.dir + +fi + if test ${ac_cv_path_install+y}; then + INSTALL=$ac_cv_path_install + else + # As a last resort, use the slow shell script. Don't cache a + # value for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + INSTALL=$ac_install_sh + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 +printf "%s\n" "$INSTALL" >&6; } + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 +printf %s "checking whether build environment is sane... " >&6; } +# Reject unsafe characters in $srcdir or the absolute working directory +# name. Accept space and tab only in the latter. +am_lf=' +' +case `pwd` in + *[\\\"\#\$\&\'\`$am_lf]*) + as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; +esac +case $srcdir in + *[\\\"\#\$\&\'\`$am_lf\ \ ]*) + as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;; +esac + +# Do 'set' in a subshell so we don't clobber the current shell's +# arguments. Must try -L first in case configure is actually a +# symlink; some systems play weird games with the mod time of symlinks +# (eg FreeBSD returns the mod time of the symlink's containing +# directory). +if ( + am_has_slept=no + for am_try in 1 2; do + echo "timestamp, slept: $am_has_slept" > conftest.file + set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` + if test "$*" = "X"; then + # -L didn't work. + set X `ls -t "$srcdir/configure" conftest.file` + fi + if test "$*" != "X $srcdir/configure conftest.file" \ + && test "$*" != "X conftest.file $srcdir/configure"; then + + # If neither matched, then we have a broken ls. This can happen + # if, for instance, CONFIG_SHELL is bash and it inherits a + # broken ls alias from the environment. This has actually + # happened. Such a system could not be considered "sane". + as_fn_error $? "ls -t appears to fail. Make sure there is not a broken + alias in your environment" "$LINENO" 5 + fi + if test "$2" = conftest.file || test $am_try -eq 2; then + break + fi + # Just in case. + sleep 1 + am_has_slept=yes + done + test "$2" = conftest.file + ) +then + # Ok. + : +else + as_fn_error $? "newly created file is older than distributed files! +Check your system clock" "$LINENO" 5 +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +# If we didn't sleep, we still need to ensure time stamps of config.status and +# generated files are strictly newer. +am_sleep_pid= +if grep 'slept: no' conftest.file >/dev/null 2>&1; then + ( sleep 1 ) & + am_sleep_pid=$! +fi + +rm -f conftest.file + +test "$program_prefix" != NONE && + program_transform_name="s&^&$program_prefix&;$program_transform_name" +# Use a double $ so make ignores it. +test "$program_suffix" != NONE && + program_transform_name="s&\$&$program_suffix&;$program_transform_name" +# Double any \ or $. +# By default was `s,x,x', remove it if useless. +ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' +program_transform_name=`printf "%s\n" "$program_transform_name" | sed "$ac_script"` + + +# Expand $ac_aux_dir to an absolute path. +am_aux_dir=`cd "$ac_aux_dir" && pwd` + + + if test x"${MISSING+set}" != xset; then + MISSING="\${SHELL} '$am_aux_dir/missing'" +fi +# Use eval to expand $SHELL +if eval "$MISSING --is-lightweight"; then + am_missing_run="$MISSING " +else + am_missing_run= + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5 +printf "%s\n" "$as_me: WARNING: 'missing' script is too old or missing" >&2;} +fi + +if test x"${install_sh+set}" != xset; then + case $am_aux_dir in + *\ * | *\ *) + install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; + *) + install_sh="\${SHELL} $am_aux_dir/install-sh" + esac +fi + +# Installed binaries are usually stripped using 'strip' when the user +# run "make install-strip". However 'strip' might not be the right +# tool to use in cross-compilation environments, therefore Automake +# will honor the 'STRIP' environment variable to overrule this program. +if test "$cross_compiling" != no; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +fi +INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a race-free mkdir -p" >&5 +printf %s "checking for a race-free mkdir -p... " >&6; } +if test -z "$MKDIR_P"; then + if test ${ac_cv_path_mkdir+y} +then : + printf %s "(cached) " >&6 +else $as_nop + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in mkdir gmkdir; do + for ac_exec_ext in '' $ac_executable_extensions; do + as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue + case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #( + 'mkdir ('*'coreutils) '* | \ + 'BusyBox '* | \ + 'mkdir (fileutils) '4.1*) + ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext + break 3;; + esac + done + done + done +IFS=$as_save_IFS + +fi + + test -d ./--version && rmdir ./--version + if test ${ac_cv_path_mkdir+y}; then + MKDIR_P="$ac_cv_path_mkdir -p" + else + # As a last resort, use the slow shell script. Don't cache a + # value for MKDIR_P within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the value is a relative name. + MKDIR_P="$ac_install_sh -d" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 +printf "%s\n" "$MKDIR_P" >&6; } + +for ac_prog in gawk mawk nawk awk +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AWK+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AWK"; then + ac_cv_prog_AWK="$AWK" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AWK="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AWK=$ac_cv_prog_AWK +if test -n "$AWK"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 +printf "%s\n" "$AWK" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AWK" && break +done + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 +printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } +set x ${MAKE-make} +ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` +if eval test \${ac_cv_prog_make_${ac_make}_set+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat >conftest.make <<\_ACEOF +SHELL = /bin/sh +all: + @echo '@@@%%%=$(MAKE)=@@@%%%' +_ACEOF +# GNU make sometimes prints "make[1]: Entering ...", which would confuse us. +case `${MAKE-make} -f conftest.make 2>/dev/null` in + *@@@%%%=?*=@@@%%%*) + eval ac_cv_prog_make_${ac_make}_set=yes;; + *) + eval ac_cv_prog_make_${ac_make}_set=no;; +esac +rm -f conftest.make +fi +if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + SET_MAKE= +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + SET_MAKE="MAKE=${MAKE-make}" +fi + +rm -rf .tst 2>/dev/null +mkdir .tst 2>/dev/null +if test -d .tst; then + am__leading_dot=. +else + am__leading_dot=_ +fi +rmdir .tst 2>/dev/null + +# Check whether --enable-silent-rules was given. +if test ${enable_silent_rules+y} +then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=1;; +esac +am_make=${MAKE-make} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +if test "`cd $srcdir && pwd`" != "`pwd`"; then + # Use -I$(srcdir) only when $(srcdir) != ., so that make's output + # is not polluted with repeated "-I." + am__isrc=' -I$(srcdir)' + # test to see if srcdir already configured + if test -f $srcdir/config.status; then + as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 + fi +fi + +# test whether we have cygpath +if test -z "$CYGPATH_W"; then + if (cygpath --version) >/dev/null 2>/dev/null; then + CYGPATH_W='cygpath -w' + else + CYGPATH_W=echo + fi +fi + + +# Define the identity of the package. + PACKAGE='pcre2' + VERSION='10.45' + + +printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h + + +printf "%s\n" "#define VERSION \"$VERSION\"" >>confdefs.h + +# Some tools Automake needs. + +ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} + + +AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} + + +AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} + + +AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} + + +MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} + +# For better backward compatibility. To be removed once Automake 1.9.x +# dies out for good. For more background, see: +# +# +mkdir_p='$(MKDIR_P)' + +# We need awk for the "check" target (and possibly the TAP driver). The +# system "awk" is bad on some platforms. +# Always define AMTAR for backward compatibility. Yes, it's still used +# in the wild :-( We should find a proper way to deprecate it ... +AMTAR='$${TAR-tar}' + + +# We'll loop over all known methods to create a tar archive until one works. +_am_tools='gnutar pax cpio none' + +am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' + + + + + +# Variables for tags utilities; see am/tags.am +if test -z "$CTAGS"; then + CTAGS=ctags +fi + +if test -z "$ETAGS"; then + ETAGS=etags +fi + +if test -z "$CSCOPE"; then + CSCOPE=cscope +fi + + + +# POSIX will say in a future version that running "rm -f" with no argument +# is OK; and we want to be able to make that assumption in our Makefile +# recipes. So use an aggressive probe to check that the usage we want is +# actually supported "in the wild" to an acceptable degree. +# See automake bug#10828. +# To make any issue more visible, cause the running configure to be aborted +# by default if the 'rm' program in use doesn't match our expectations; the +# user can still override this though. +if rm -f && rm -fr && rm -rf; then : OK; else + cat >&2 <<'END' +Oops! + +Your 'rm' program seems unable to run without file operands specified +on the command line, even when the '-f' option is present. This is contrary +to the behaviour of most rm programs out there, and not conforming with +the upcoming POSIX standard: + +Please tell bug-automake@gnu.org about your system, including the value +of your $PATH and any error possibly output before this message. This +can help us improve future automake versions. + +END + if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then + echo 'Configuration will proceed anyway, since you have set the' >&2 + echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2 + echo >&2 + else + cat >&2 <<'END' +Aborting the configuration process, to ensure you take notice of the issue. + +You can download and install GNU coreutils to get an 'rm' implementation +that behaves properly: . + +If you want to complete the configuration process using your problematic +'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM +to "yes", and re-run configure. + +END + as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5 + fi +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable maintainer-specific portions of Makefiles" >&5 +printf %s "checking whether to enable maintainer-specific portions of Makefiles... " >&6; } + # Check whether --enable-maintainer-mode was given. +if test ${enable_maintainer_mode+y} +then : + enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval +else $as_nop + USE_MAINTAINER_MODE=no +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" >&5 +printf "%s\n" "$USE_MAINTAINER_MODE" >&6; } + if test $USE_MAINTAINER_MODE = yes; then + MAINTAINER_MODE_TRUE= + MAINTAINER_MODE_FALSE='#' +else + MAINTAINER_MODE_TRUE='#' + MAINTAINER_MODE_FALSE= +fi + + MAINT=$MAINTAINER_MODE_TRUE + + +# Check whether --enable-silent-rules was given. +if test ${enable_silent_rules+y} +then : + enableval=$enable_silent_rules; +fi + +case $enable_silent_rules in # ((( + yes) AM_DEFAULT_VERBOSITY=0;; + no) AM_DEFAULT_VERBOSITY=1;; + *) AM_DEFAULT_VERBOSITY=0;; +esac +am_make=${MAKE-make} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5 +printf %s "checking whether $am_make supports nested variables... " >&6; } +if test ${am_cv_make_support_nested_variables+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if printf "%s\n" 'TRUE=$(BAR$(V)) +BAR0=false +BAR1=true +V=1 +am__doit: + @$(TRUE) +.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then + am_cv_make_support_nested_variables=yes +else + am_cv_make_support_nested_variables=no +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5 +printf "%s\n" "$am_cv_make_support_nested_variables" >&6; } +if test $am_cv_make_support_nested_variables = yes; then + AM_V='$(V)' + AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)' +else + AM_V=$AM_DEFAULT_VERBOSITY + AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY +fi +AM_BACKSLASH='\' + +ac_config_headers="$ac_config_headers src/config.h" + + +# This was added at the suggestion of libtoolize (03-Jan-10) + + +# The default CFLAGS in Autoconf are "-g -O2" for gcc and just "-g" for any +# other compiler. There doesn't seem to be a standard way of getting rid of the +# -g (which I don't think is needed for a production library). This fudge seems +# to achieve the necessary. First, we remember the externally set values of +# CFLAGS. Then call the AC_PROG_CC macro to find the compiler - if CFLAGS is +# not set, it will be set to Autoconf's defaults. Afterwards, if the original +# values were not set, remove the -g from the Autoconf defaults. + +remember_set_CFLAGS="$CFLAGS" + + + + + + + + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}clang", so it can be a program name with args. +set dummy ${ac_tool_prefix}clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +printf "%s\n" "$CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "clang", so it can be a program name with args. +set dummy clang; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="clang" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +printf "%s\n" "$ac_ct_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +fi + + +test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion -version; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 +printf %s "checking whether the C compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else $as_nop + ac_file='' +fi +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 +printf %s "checking for C compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +printf "%s\n" "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C" >&5 +printf %s "checking whether the compiler supports GNU C... " >&6; } +if test ${ac_cv_c_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_compiler_gnu=yes +else $as_nop + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+y} +ac_save_CFLAGS=$CFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +printf %s "checking whether $CC accepts -g... " >&6; } +if test ${ac_cv_prog_cc_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +else $as_nop + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +printf "%s\n" "$ac_cv_prog_cc_g" >&6; } +if test $ac_test_CFLAGS; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +ac_prog_cc_stdc=no +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C11 features" >&5 +printf %s "checking for $CC option to enable C11 features... " >&6; } +if test ${ac_cv_prog_cc_c11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c11=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c11_program +_ACEOF +for ac_arg in '' -std=gnu11 +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c11=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c11" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5 +printf "%s\n" "$ac_cv_prog_cc_c11" >&6; } + CC="$CC $ac_cv_prog_cc_c11" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11 + ac_prog_cc_stdc=c11 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C99 features" >&5 +printf %s "checking for $CC option to enable C99 features... " >&6; } +if test ${ac_cv_prog_cc_c99+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c99=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c99_program +_ACEOF +for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 -AC99 -D_STDC_C99= +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c99=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c99" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c99" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c99" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5 +printf "%s\n" "$ac_cv_prog_cc_c99" >&6; } + CC="$CC $ac_cv_prog_cc_c99" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 + ac_prog_cc_stdc=c99 +fi +fi +if test x$ac_prog_cc_stdc = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to enable C89 features" >&5 +printf %s "checking for $CC option to enable C89 features... " >&6; } +if test ${ac_cv_prog_cc_c89+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_c_conftest_c89_program +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC +fi + +if test "x$ac_cv_prog_cc_c89" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cc_c89" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +printf "%s\n" "$ac_cv_prog_cc_c89" >&6; } + CC="$CC $ac_cv_prog_cc_c89" +fi + ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 + ac_prog_cc_stdc=c89 +fi +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5 +printf %s "checking whether $CC understands -c and -o together... " >&6; } +if test ${am_cv_prog_cc_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF + # Make sure it works both with $CC and with simple cc. + # Following AC_PROG_CC_C_O, we do the test twice because some + # compilers refuse to overwrite an existing .o file with -o, + # though they will create one. + am_cv_prog_cc_c_o=yes + for am_i in 1 2; do + if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5 + ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } \ + && test -f conftest2.$ac_objext; then + : OK + else + am_cv_prog_cc_c_o=no + break + fi + done + rm -f core conftest* + unset am_i +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5 +printf "%s\n" "$am_cv_prog_cc_c_o" >&6; } +if test "$am_cv_prog_cc_c_o" != yes; then + # Losing compiler, so override with the script. + # FIXME: It is wrong to rewrite CC. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__CC in this case, + # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" + CC="$am_aux_dir/compile $CC" +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +DEPDIR="${am__leading_dot}deps" + +ac_config_commands="$ac_config_commands depfiles" + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5 +printf %s "checking whether ${MAKE-make} supports the include directive... " >&6; } +cat > confinc.mk << 'END' +am__doit: + @echo this is the am__doit target >confinc.out +.PHONY: am__doit +END +am__include="#" +am__quote= +# BSD make does it like this. +echo '.include "confinc.mk" # ignored' > confmf.BSD +# Other make implementations (GNU, Solaris 10, AIX) do it like this. +echo 'include confinc.mk # ignored' > confmf.GNU +_am_result=no +for s in GNU BSD; do + { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5 + (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + case $?:`cat confinc.out 2>/dev/null` in #( + '0:this is the am__doit target') : + case $s in #( + BSD) : + am__include='.include' am__quote='"' ;; #( + *) : + am__include='include' am__quote='' ;; +esac ;; #( + *) : + ;; +esac + if test "$am__include" != "#"; then + _am_result="yes ($s style)" + break + fi +done +rm -f confinc.* confmf.* +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5 +printf "%s\n" "${_am_result}" >&6; } + +# Check whether --enable-dependency-tracking was given. +if test ${enable_dependency_tracking+y} +then : + enableval=$enable_dependency_tracking; +fi + +if test "x$enable_dependency_tracking" != xno; then + am_depcomp="$ac_aux_dir/depcomp" + AMDEPBACKSLASH='\' + am__nodep='_no' +fi + if test "x$enable_dependency_tracking" != xno; then + AMDEP_TRUE= + AMDEP_FALSE='#' +else + AMDEP_TRUE='#' + AMDEP_FALSE= +fi + + + +depcc="$CC" am_compiler_list= + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 +printf %s "checking dependency style of $depcc... " >&6; } +if test ${am_cv_CC_dependencies_compiler_type+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then + # We make a subdir and do the tests there. Otherwise we can end up + # making bogus files that we don't know about and never remove. For + # instance it was reported that on HP-UX the gcc test will end up + # making a dummy file named 'D' -- because '-MD' means "put the output + # in D". + rm -rf conftest.dir + mkdir conftest.dir + # Copy depcomp to subdir because otherwise we won't find it if we're + # using a relative directory. + cp "$am_depcomp" conftest.dir + cd conftest.dir + # We will build objects and dependencies in a subdirectory because + # it helps to detect inapplicable dependency modes. For instance + # both Tru64's cc and ICC support -MD to output dependencies as a + # side effect of compilation, but ICC will put the dependencies in + # the current directory while Tru64 will put them in the object + # directory. + mkdir sub + + am_cv_CC_dependencies_compiler_type=none + if test "$am_compiler_list" = ""; then + am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` + fi + am__universal=false + case " $depcc " in #( + *\ -arch\ *\ -arch\ *) am__universal=true ;; + esac + + for depmode in $am_compiler_list; do + # Setup a source with many dependencies, because some compilers + # like to wrap large dependency lists on column 80 (with \), and + # we should not choose a depcomp mode which is confused by this. + # + # We need to recreate these files for each test, as the compiler may + # overwrite some of them when testing with obscure command lines. + # This happens at least with the AIX C compiler. + : > sub/conftest.c + for i in 1 2 3 4 5 6; do + echo '#include "conftst'$i'.h"' >> sub/conftest.c + # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with + # Solaris 10 /bin/sh. + echo '/* dummy */' > sub/conftst$i.h + done + echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf + + # We check with '-c' and '-o' for the sake of the "dashmstdout" + # mode. It turns out that the SunPro C++ compiler does not properly + # handle '-M -o', and we need to detect this. Also, some Intel + # versions had trouble with output in subdirs. + am__obj=sub/conftest.${OBJEXT-o} + am__minus_obj="-o $am__obj" + case $depmode in + gcc) + # This depmode causes a compiler race in universal mode. + test "$am__universal" = false || continue + ;; + nosideeffect) + # After this tag, mechanisms are not by side-effect, so they'll + # only be used when explicitly requested. + if test "x$enable_dependency_tracking" = xyes; then + continue + else + break + fi + ;; + msvc7 | msvc7msys | msvisualcpp | msvcmsys) + # This compiler won't grok '-c -o', but also, the minuso test has + # not run yet. These depmodes are late enough in the game, and + # so weak that their functioning should not be impacted. + am__obj=conftest.${OBJEXT-o} + am__minus_obj= + ;; + none) break ;; + esac + if depmode=$depmode \ + source=sub/conftest.c object=$am__obj \ + depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ + $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ + >/dev/null 2>conftest.err && + grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && + grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && + grep $am__obj sub/conftest.Po > /dev/null 2>&1 && + ${MAKE-make} -s -f confmf > /dev/null 2>&1; then + # icc doesn't choke on unknown options, it will just issue warnings + # or remarks (even with -Werror). So we grep stderr for any message + # that says an option was ignored or not supported. + # When given -MP, icc 7.0 and 7.1 complain thusly: + # icc: Command line warning: ignoring option '-M'; no argument required + # The diagnosis changed in icc 8.0: + # icc: Command line remark: option '-MP' not supported + if (grep 'ignoring option' conftest.err || + grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else + am_cv_CC_dependencies_compiler_type=$depmode + break + fi + fi + done + + cd .. + rm -rf conftest.dir +else + am_cv_CC_dependencies_compiler_type=none +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 +printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; } +CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type + + if + test "x$enable_dependency_tracking" != xno \ + && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then + am__fastdepCC_TRUE= + am__fastdepCC_FALSE='#' +else + am__fastdepCC_TRUE='#' + am__fastdepCC_FALSE= +fi + + + + +ac_header= ac_cache= +for ac_item in $ac_header_c_list +do + if test $ac_cache; then + ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache "$ac_includes_default" + if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then + printf "%s\n" "#define $ac_item 1" >> confdefs.h + fi + ac_header= ac_cache= + elif test $ac_header; then + ac_cache=$ac_item + else + ac_header=$ac_item + fi +done + + + + + + + + +if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes +then : + +printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether it is safe to define __EXTENSIONS__" >&5 +printf %s "checking whether it is safe to define __EXTENSIONS__... " >&6; } +if test ${ac_cv_safe_to_define___extensions__+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +# define __EXTENSIONS__ 1 + $ac_includes_default +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_safe_to_define___extensions__=yes +else $as_nop + ac_cv_safe_to_define___extensions__=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_safe_to_define___extensions__" >&5 +printf "%s\n" "$ac_cv_safe_to_define___extensions__" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether _XOPEN_SOURCE should be defined" >&5 +printf %s "checking whether _XOPEN_SOURCE should be defined... " >&6; } +if test ${ac_cv_should_define__xopen_source+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_should_define__xopen_source=no + if test $ac_cv_header_wchar_h = yes +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + mbstate_t x; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #define _XOPEN_SOURCE 500 + #include + mbstate_t x; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_should_define__xopen_source=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_should_define__xopen_source" >&5 +printf "%s\n" "$ac_cv_should_define__xopen_source" >&6; } + + printf "%s\n" "#define _ALL_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _DARWIN_C_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _GNU_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _HPUX_ALT_XOPEN_SOCKET_API 1" >>confdefs.h + + printf "%s\n" "#define _NETBSD_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _OPENBSD_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_BFP_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_DFP_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_IEC_60559_TYPES_EXT__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_LIB_EXT2__ 1" >>confdefs.h + + printf "%s\n" "#define __STDC_WANT_MATH_SPEC_FUNCS__ 1" >>confdefs.h + + printf "%s\n" "#define _TANDEM_SOURCE 1" >>confdefs.h + + if test $ac_cv_header_minix_config_h = yes +then : + MINIX=yes + printf "%s\n" "#define _MINIX 1" >>confdefs.h + + printf "%s\n" "#define _POSIX_SOURCE 1" >>confdefs.h + + printf "%s\n" "#define _POSIX_1_SOURCE 2" >>confdefs.h + +else $as_nop + MINIX= +fi + if test $ac_cv_safe_to_define___extensions__ = yes +then : + printf "%s\n" "#define __EXTENSIONS__ 1" >>confdefs.h + +fi + if test $ac_cv_should_define__xopen_source = yes +then : + printf "%s\n" "#define _XOPEN_SOURCE 500" >>confdefs.h + +fi + + +if test "x$remember_set_CFLAGS" = "x" +then + if test "$CFLAGS" = "-g -O2" + then + CFLAGS="-O2" + elif test "$CFLAGS" = "-g" + then + CFLAGS="" + fi +fi + +# This is a new thing required to stop a warning from automake 1.12 + + if test -n "$ac_tool_prefix"; then + for ac_prog in ar lib "link -lib" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +printf "%s\n" "$AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar lib "link -lib" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +printf "%s\n" "$ac_ct_AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the archiver ($AR) interface" >&5 +printf %s "checking the archiver ($AR) interface... " >&6; } +if test ${am_cv_ar_interface+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + am_cv_ar_interface=ar + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int some_variable = 0; +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + am_ar_try='$AR cru libconftest.a conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=ar + else + am_ar_try='$AR -NOLOGO -OUT:conftest.lib conftest.$ac_objext >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$am_ar_try\""; } >&5 + (eval $am_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test "$ac_status" -eq 0; then + am_cv_ar_interface=lib + else + am_cv_ar_interface=unknown + fi + fi + rm -f conftest.lib libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_ar_interface" >&5 +printf "%s\n" "$am_cv_ar_interface" >&6; } + +case $am_cv_ar_interface in +ar) + ;; +lib) + # Microsoft lib, so override with the ar-lib wrapper script. + # FIXME: It is wrong to rewrite AR. + # But if we don't then we get into trouble of one sort or another. + # A longer-term fix would be to have automake use am__AR in this case, + # and then we could set am__AR="$am_aux_dir/ar-lib \$(AR)" or something + # similar. + AR="$am_aux_dir/ar-lib $AR" + ;; +unknown) + as_fn_error $? "could not determine $AR interface" "$LINENO" 5 + ;; +esac + + +# Check for a 64-bit integer type +ac_fn_c_find_intX_t "$LINENO" "64" "ac_cv_c_int64_t" +case $ac_cv_c_int64_t in #( + no|yes) ;; #( + *) + +printf "%s\n" "#define int64_t $ac_cv_c_int64_t" >>confdefs.h +;; +esac + + + +case `pwd` in + *\ * | *\ *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 +printf "%s\n" "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; +esac + + + +macro_version='2.4.7' +macro_revision='2.4.7' + + + + + + + + + + + + + + +ltmain=$ac_aux_dir/ltmain.sh + + + + # Make sure we can run config.sub. +$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 || + as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5 + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 +printf %s "checking build system type... " >&6; } +if test ${ac_cv_build+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_build_alias=$build_alias +test "x$ac_build_alias" = x && + ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"` +test "x$ac_build_alias" = x && + as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 +ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" "$LINENO" 5 + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 +printf "%s\n" "$ac_cv_build" >&6; } +case $ac_cv_build in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; +esac +build=$ac_cv_build +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_build +shift +build_cpu=$1 +build_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +build_os=$* +IFS=$ac_save_IFS +case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 +printf %s "checking host system type... " >&6; } +if test ${ac_cv_host+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test "x$host_alias" = x; then + ac_cv_host=$ac_cv_build +else + ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` || + as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" "$LINENO" 5 +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 +printf "%s\n" "$ac_cv_host" >&6; } +case $ac_cv_host in +*-*-*) ;; +*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; +esac +host=$ac_cv_host +ac_save_IFS=$IFS; IFS='-' +set x $ac_cv_host +shift +host_cpu=$1 +host_vendor=$2 +shift; shift +# Remember, the first character of IFS is used to create $*, +# except with old shells: +host_os=$* +IFS=$ac_save_IFS +case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac + + +# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\(["`$\\]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' + +ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 +printf %s "checking how to print strings... " >&6; } +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "" +} + +case $ECHO in + printf*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: printf" >&5 +printf "%s\n" "printf" >&6; } ;; + print*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 +printf "%s\n" "print -r" >&6; } ;; + *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: cat" >&5 +printf "%s\n" "cat" >&6; } ;; +esac + + + + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 +printf %s "checking for a sed that does not truncate output... " >&6; } +if test ${ac_cv_path_SED+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for ac_i in 1 2 3 4 5 6 7; do + ac_script="$ac_script$as_nl$ac_script" + done + echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed + { ac_script=; unset ac_script;} + if test -z "$SED"; then + ac_path_SED_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in sed gsed + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_SED="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_SED" || continue +# Check for GNU ac_path_SED and select it if it is found. + # Check for GNU $ac_path_SED +case `"$ac_path_SED" --version 2>&1` in +*GNU*) + ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" '' >> "conftest.nl" + "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_SED_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_SED="$ac_path_SED" + ac_path_SED_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_SED_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_SED"; then + as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 + fi +else + ac_cv_path_SED=$SED +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 +printf "%s\n" "$ac_cv_path_SED" >&6; } + SED="$ac_cv_path_SED" + rm -f conftest.sed + +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +printf %s "checking for grep that handles long lines and -e... " >&6; } +if test ${ac_cv_path_GREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in grep ggrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +printf "%s\n" "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +printf %s "checking for egrep... " >&6; } +if test ${ac_cv_path_EGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in egrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +printf "%s\n" "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 +printf %s "checking for fgrep... " >&6; } +if test ${ac_cv_path_FGREP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 + then ac_cv_path_FGREP="$GREP -F" + else + if test -z "$FGREP"; then + ac_path_FGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in fgrep + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_FGREP="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_FGREP" || continue +# Check for GNU ac_path_FGREP and select it if it is found. + # Check for GNU $ac_path_FGREP +case `"$ac_path_FGREP" --version 2>&1` in +*GNU*) + ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; +*) + ac_count=0 + printf %s 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + printf "%s\n" 'FGREP' >> "conftest.nl" + "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_FGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_FGREP="$ac_path_FGREP" + ac_path_FGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_FGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_FGREP"; then + as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_FGREP=$FGREP +fi + + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 +printf "%s\n" "$ac_cv_path_FGREP" >&6; } + FGREP="$ac_cv_path_FGREP" + + +test -z "$GREP" && GREP=grep + + + + + + + + + + + + + + + + + + + +# Check whether --with-gnu-ld was given. +if test ${with_gnu_ld+y} +then : + withval=$with_gnu_ld; test no = "$withval" || with_gnu_ld=yes +else $as_nop + with_gnu_ld=no +fi + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 +printf %s "checking for ld used by $CC... " >&6; } + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [\\/]* | ?:[\\/]*) + re_direlt='/[^/][^/]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 +printf %s "checking for GNU ld... " >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 +printf %s "checking for non-GNU ld... " >&6; } +fi +if test ${lt_cv_path_LD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &5 +printf "%s\n" "$LD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 +printf %s "checking if the linker ($LD) is GNU ld... " >&6; } +if test ${lt_cv_prog_gnu_ld+y} +then : + printf %s "(cached) " >&6 +else $as_nop + # I'd rather use --version here, but apparently some GNU lds only accept -v. +case `$LD -v 2>&1 &5 +printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; } +with_gnu_ld=$lt_cv_prog_gnu_ld + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 +printf %s "checking for BSD- or MS-compatible name lister (nm)... " >&6; } +if test ${lt_cv_path_NM+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 +printf "%s\n" "$lt_cv_path_NM" >&6; } +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + if test -n "$ac_tool_prefix"; then + for ac_prog in dumpbin "link -dump" + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DUMPBIN+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DUMPBIN"; then + ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DUMPBIN=$ac_cv_prog_DUMPBIN +if test -n "$DUMPBIN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 +printf "%s\n" "$DUMPBIN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$DUMPBIN" && break + done +fi +if test -z "$DUMPBIN"; then + ac_ct_DUMPBIN=$DUMPBIN + for ac_prog in dumpbin "link -dump" +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DUMPBIN+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DUMPBIN"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN +if test -n "$ac_ct_DUMPBIN"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 +printf "%s\n" "$ac_ct_DUMPBIN" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_DUMPBIN" && break +done + + if test "x$ac_ct_DUMPBIN" = x; then + DUMPBIN=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DUMPBIN=$ac_ct_DUMPBIN + fi +fi + + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 +printf %s "checking the name lister ($NM) interface... " >&6; } +if test ${lt_cv_nm_interface+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&5 + (eval echo "\"\$as_me:$LINENO: output\"" >&5) + cat conftest.out >&5 + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest* +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 +printf "%s\n" "$lt_cv_nm_interface" >&6; } + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +printf %s "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +printf "%s\n" "no, using $LN_S" >&6; } +fi + +# find the maximum length of command line arguments +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 +printf %s "checking the maximum length of command line arguments... " >&6; } +if test ${lt_cv_sys_max_cmd_len+y} +then : + printf %s "(cached) " >&6 +else $as_nop + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[ ]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac + +fi + +if test -n "$lt_cv_sys_max_cmd_len"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 +printf "%s\n" "$lt_cv_sys_max_cmd_len" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none" >&5 +printf "%s\n" "none" >&6; } +fi +max_cmd_len=$lt_cv_sys_max_cmd_len + + + + + + +: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} + +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi + + + + + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 +printf %s "checking how to convert $build file names to $host format... " >&6; } +if test ${lt_cv_to_host_file_cmd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac + +fi + +to_host_file_cmd=$lt_cv_to_host_file_cmd +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 +printf "%s\n" "$lt_cv_to_host_file_cmd" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 +printf %s "checking how to convert $build file names to toolchain format... " >&6; } +if test ${lt_cv_to_tool_file_cmd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + #assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac + +fi + +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 +printf "%s\n" "$lt_cv_to_tool_file_cmd" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 +printf %s "checking for $LD option to reload object files... " >&6; } +if test ${lt_cv_ld_reload_flag+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ld_reload_flag='-r' +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 +printf "%s\n" "$lt_cv_ld_reload_flag" >&6; } +reload_flag=$lt_cv_ld_reload_flag +case $reload_flag in +"" | " "*) ;; +*) reload_flag=" $reload_flag" ;; +esac +reload_cmds='$LD$reload_flag -o $output$reload_objs' +case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + if test yes != "$GCC"; then + reload_cmds=false + fi + ;; + darwin*) + if test yes = "$GCC"; then + reload_cmds='$LTCC $LTCFLAGS -nostdlib $wl-r -o $output$reload_objs' + else + reload_cmds='$LD$reload_flag -o $output$reload_objs' + fi + ;; +esac + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}file", so it can be a program name with args. +set dummy ${ac_tool_prefix}file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$FILECMD"; then + ac_cv_prog_FILECMD="$FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_FILECMD="${ac_tool_prefix}file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +FILECMD=$ac_cv_prog_FILECMD +if test -n "$FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $FILECMD" >&5 +printf "%s\n" "$FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_FILECMD"; then + ac_ct_FILECMD=$FILECMD + # Extract the first word of "file", so it can be a program name with args. +set dummy file; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_FILECMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_FILECMD"; then + ac_cv_prog_ac_ct_FILECMD="$ac_ct_FILECMD" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_FILECMD="file" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_FILECMD=$ac_cv_prog_ac_ct_FILECMD +if test -n "$ac_ct_FILECMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_FILECMD" >&5 +printf "%s\n" "$ac_ct_FILECMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_FILECMD" = x; then + FILECMD=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + FILECMD=$ac_ct_FILECMD + fi +else + FILECMD="$ac_cv_prog_FILECMD" +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +printf "%s\n" "$OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +printf "%s\n" "$ac_ct_OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 +printf %s "checking how to recognize dependent libraries... " >&6; } +if test ${lt_cv_deplibs_check_method+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[4-9]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[45]*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='$FILECMD -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly* | midnightbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=$FILECMD + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[3-9]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd* | bitrig*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 +printf "%s\n" "$lt_cv_deplibs_check_method" >&6; } + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + + + + + + + + + + + + + + + + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +printf "%s\n" "$DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +printf "%s\n" "$ac_ct_DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 +printf %s "checking how to associate runtime and link libraries... " >&6; } +if test ${lt_cv_sharedlib_from_linklib_cmd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 +printf "%s\n" "$lt_cv_sharedlib_from_linklib_cmd" >&6; } +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + + + + + + + +if test -n "$ac_tool_prefix"; then + for ac_prog in ar + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AR"; then + ac_cv_prog_AR="$AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AR="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AR=$ac_cv_prog_AR +if test -n "$AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 +printf "%s\n" "$AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$AR" && break + done +fi +if test -z "$AR"; then + ac_ct_AR=$AR + for ac_prog in ar +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AR+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_AR"; then + ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AR="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AR=$ac_cv_prog_ac_ct_AR +if test -n "$ac_ct_AR"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 +printf "%s\n" "$ac_ct_AR" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$ac_ct_AR" && break +done + + if test "x$ac_ct_AR" = x; then + AR="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AR=$ac_ct_AR + fi +fi + +: ${AR=ar} + + + + + + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because thats what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS + + + + + + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 +printf %s "checking for archiver @FILE support... " >&6; } +if test ${lt_cv_ar_at_file+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ar_at_file=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 + (eval $lt_ar_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 +printf "%s\n" "$lt_cv_ar_at_file" >&6; } + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi + + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. +set dummy ${ac_tool_prefix}strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$STRIP"; then + ac_cv_prog_STRIP="$STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_STRIP="${ac_tool_prefix}strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +STRIP=$ac_cv_prog_STRIP +if test -n "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 +printf "%s\n" "$STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_STRIP"; then + ac_ct_STRIP=$STRIP + # Extract the first word of "strip", so it can be a program name with args. +set dummy strip; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_STRIP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_STRIP"; then + ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_STRIP="strip" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP +if test -n "$ac_ct_STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 +printf "%s\n" "$ac_ct_STRIP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_STRIP" = x; then + STRIP=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + STRIP=$ac_ct_STRIP + fi +else + STRIP="$ac_cv_prog_STRIP" +fi + +test -z "$STRIP" && STRIP=: + + + + + + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. +set dummy ${ac_tool_prefix}ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_RANLIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +RANLIB=$ac_cv_prog_RANLIB +if test -n "$RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 +printf "%s\n" "$RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_RANLIB"; then + ac_ct_RANLIB=$RANLIB + # Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_RANLIB+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_RANLIB"; then + ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_RANLIB="ranlib" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB +if test -n "$ac_ct_RANLIB"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 +printf "%s\n" "$ac_ct_RANLIB" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_RANLIB" = x; then + RANLIB=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + RANLIB=$ac_ct_RANLIB + fi +else + RANLIB="$ac_cv_prog_RANLIB" +fi + +test -z "$RANLIB" && RANLIB=: + + + + + + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + + +# Check for command to grab the raw symbol name followed by C symbol from nm. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 +printf %s "checking command to parse $NM output from $compiler object... " >&6; } +if test ${lt_cv_sys_global_symbol_pipe+y} +then : + printf %s "(cached) " >&6 +else $as_nop + +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[BCDEGRST]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([_A-Za-z][_A-Za-z0-9]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[BCDT]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[ABCDGISTW]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[ABCDEGRST]' + fi + ;; +irix* | nonstopux*) + symcode='[BCDEGRST]' + ;; +osf*) + symcode='[BCDEGQRST]' + ;; +solaris*) + symcode='[BDRT]' + ;; +sco3.2v5*) + symcode='[DT]' + ;; +sysv4.2uw2*) + symcode='[DT]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[ABDT]' + ;; +sysv4) + symcode='[DFNSTU]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[ABCDGIRSTW]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++ or ICC, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK '"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx" + else + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + # Now try to grab the symbols. + nlist=conftest.nm + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&5 + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&5 && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&5 + fi + else + echo "cannot find nm_test_var in $nlist" >&5 + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 + fi + else + echo "$progname: failed program was:" >&5 + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done + +fi + +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: failed" >&5 +printf "%s\n" "failed" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ok" >&5 +printf "%s\n" "ok" >&6; } +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 +printf %s "checking for sysroot... " >&6; } + +# Check whether --with-sysroot was given. +if test ${with_sysroot+y} +then : + withval=$with_sysroot; +else $as_nop + with_sysroot=no +fi + + +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_sysroot" >&5 +printf "%s\n" "$with_sysroot" >&6; } + as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 + ;; +esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 +printf "%s\n" "${lt_sysroot:-no}" >&6; } + + + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a working dd" >&5 +printf %s "checking for a working dd... " >&6; } +if test ${ac_cv_path_lt_DD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +if test -z "$lt_DD"; then + ac_path_lt_DD_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_prog in dd + do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_lt_DD="$as_dir$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_lt_DD" || continue +if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi + $ac_path_lt_DD_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_lt_DD"; then + : + fi +else + ac_cv_path_lt_DD=$lt_DD +fi + +rm -f conftest.i conftest2.i conftest.out +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_lt_DD" >&5 +printf "%s\n" "$ac_cv_path_lt_DD" >&6; } + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to truncate binary pipes" >&5 +printf %s "checking how to truncate binary pipes... " >&6; } +if test ${lt_cv_truncate_bin+y} +then : + printf %s "(cached) " >&6 +else $as_nop + printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q" +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_truncate_bin" >&5 +printf "%s\n" "$lt_cv_truncate_bin" >&6; } + + + + + + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + +# Check whether --enable-libtool-lock was given. +if test ${enable_libtool_lock+y} +then : + enableval=$enable_libtool_lock; +fi + +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '#line '$LINENO' "configure"' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + emul=elf + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `$FILECMD conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 +printf %s "checking whether the C compiler needs -belf... " >&6; } +if test ${lt_cv_cc_needs_belf+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_cc_needs_belf=yes +else $as_nop + lt_cv_cc_needs_belf=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 +printf "%s\n" "$lt_cv_cc_needs_belf" >&6; } + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + case `$FILECMD conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock + +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. +set dummy ${ac_tool_prefix}mt; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_MANIFEST_TOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$MANIFEST_TOOL"; then + ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL +if test -n "$MANIFEST_TOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 +printf "%s\n" "$MANIFEST_TOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_MANIFEST_TOOL"; then + ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL + # Extract the first word of "mt", so it can be a program name with args. +set dummy mt; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_MANIFEST_TOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_MANIFEST_TOOL"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL +if test -n "$ac_ct_MANIFEST_TOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 +printf "%s\n" "$ac_ct_MANIFEST_TOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_MANIFEST_TOOL" = x; then + MANIFEST_TOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL + fi +else + MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" +fi + +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 +printf %s "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } +if test ${lt_cv_path_mainfest_tool+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&5 + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest* +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 +printf "%s\n" "$lt_cv_path_mainfest_tool" >&6; } +if test yes != "$lt_cv_path_mainfest_tool"; then + MANIFEST_TOOL=: +fi + + + + + + + case $host_os in + rhapsody* | darwin*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. +set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DSYMUTIL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DSYMUTIL"; then + ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DSYMUTIL=$ac_cv_prog_DSYMUTIL +if test -n "$DSYMUTIL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 +printf "%s\n" "$DSYMUTIL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DSYMUTIL"; then + ac_ct_DSYMUTIL=$DSYMUTIL + # Extract the first word of "dsymutil", so it can be a program name with args. +set dummy dsymutil; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DSYMUTIL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DSYMUTIL"; then + ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL +if test -n "$ac_ct_DSYMUTIL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 +printf "%s\n" "$ac_ct_DSYMUTIL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DSYMUTIL" = x; then + DSYMUTIL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DSYMUTIL=$ac_ct_DSYMUTIL + fi +else + DSYMUTIL="$ac_cv_prog_DSYMUTIL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. +set dummy ${ac_tool_prefix}nmedit; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_NMEDIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$NMEDIT"; then + ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +NMEDIT=$ac_cv_prog_NMEDIT +if test -n "$NMEDIT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 +printf "%s\n" "$NMEDIT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_NMEDIT"; then + ac_ct_NMEDIT=$NMEDIT + # Extract the first word of "nmedit", so it can be a program name with args. +set dummy nmedit; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_NMEDIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_NMEDIT"; then + ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_NMEDIT="nmedit" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT +if test -n "$ac_ct_NMEDIT"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 +printf "%s\n" "$ac_ct_NMEDIT" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_NMEDIT" = x; then + NMEDIT=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + NMEDIT=$ac_ct_NMEDIT + fi +else + NMEDIT="$ac_cv_prog_NMEDIT" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. +set dummy ${ac_tool_prefix}lipo; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_LIPO+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$LIPO"; then + ac_cv_prog_LIPO="$LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_LIPO="${ac_tool_prefix}lipo" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +LIPO=$ac_cv_prog_LIPO +if test -n "$LIPO"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 +printf "%s\n" "$LIPO" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_LIPO"; then + ac_ct_LIPO=$LIPO + # Extract the first word of "lipo", so it can be a program name with args. +set dummy lipo; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_LIPO+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_LIPO"; then + ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_LIPO="lipo" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO +if test -n "$ac_ct_LIPO"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 +printf "%s\n" "$ac_ct_LIPO" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_LIPO" = x; then + LIPO=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + LIPO=$ac_ct_LIPO + fi +else + LIPO="$ac_cv_prog_LIPO" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OTOOL"; then + ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL="${ac_tool_prefix}otool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL=$ac_cv_prog_OTOOL +if test -n "$OTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 +printf "%s\n" "$OTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL"; then + ac_ct_OTOOL=$OTOOL + # Extract the first word of "otool", so it can be a program name with args. +set dummy otool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OTOOL"; then + ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL="otool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL +if test -n "$ac_ct_OTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 +printf "%s\n" "$ac_ct_OTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OTOOL" = x; then + OTOOL=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL=$ac_ct_OTOOL + fi +else + OTOOL="$ac_cv_prog_OTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. +set dummy ${ac_tool_prefix}otool64; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OTOOL64+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OTOOL64"; then + ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OTOOL64=$ac_cv_prog_OTOOL64 +if test -n "$OTOOL64"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 +printf "%s\n" "$OTOOL64" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OTOOL64"; then + ac_ct_OTOOL64=$OTOOL64 + # Extract the first word of "otool64", so it can be a program name with args. +set dummy otool64; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OTOOL64+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OTOOL64"; then + ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OTOOL64="otool64" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 +if test -n "$ac_ct_OTOOL64"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 +printf "%s\n" "$ac_ct_OTOOL64" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OTOOL64" = x; then + OTOOL64=":" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OTOOL64=$ac_ct_OTOOL64 + fi +else + OTOOL64="$ac_cv_prog_OTOOL64" +fi + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 +printf %s "checking for -single_module linker flag... " >&6; } +if test ${lt_cv_apple_cc_single_mod+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&5 + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&5 + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 +printf "%s\n" "$lt_cv_apple_cc_single_mod" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 +printf %s "checking for -exported_symbols_list linker flag... " >&6; } +if test ${lt_cv_ld_exported_symbols_list+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_ld_exported_symbols_list=yes +else $as_nop + lt_cv_ld_exported_symbols_list=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 +printf "%s\n" "$lt_cv_ld_exported_symbols_list" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 +printf %s "checking for -force_load linker flag... " >&6; } +if test ${lt_cv_ld_force_load+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&5 + $AR $AR_FLAGS libconftest.a conftest.o 2>&5 + echo "$RANLIB libconftest.a" >&5 + $RANLIB libconftest.a 2>&5 + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&5 + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&5 + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 +printf "%s\n" "$lt_cv_ld_force_load" >&6; } + case $host_os in + rhapsody* | darwin1.[012]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[012],*|,*powerpc*-darwin[5-8]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + +ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default +" +if test "x$ac_cv_header_dlfcn_h" = xyes +then : + printf "%s\n" "#define HAVE_DLFCN_H 1" >>confdefs.h + +fi + + + + + +# Set options +enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}as", so it can be a program name with args. +set dummy ${ac_tool_prefix}as; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_AS+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$AS"; then + ac_cv_prog_AS="$AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_AS="${ac_tool_prefix}as" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +AS=$ac_cv_prog_AS +if test -n "$AS"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AS" >&5 +printf "%s\n" "$AS" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_AS"; then + ac_ct_AS=$AS + # Extract the first word of "as", so it can be a program name with args. +set dummy as; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_AS+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_AS"; then + ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_AS="as" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_AS=$ac_cv_prog_ac_ct_AS +if test -n "$ac_ct_AS"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5 +printf "%s\n" "$ac_ct_AS" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_AS" = x; then + AS="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + AS=$ac_ct_AS + fi +else + AS="$ac_cv_prog_AS" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. +set dummy ${ac_tool_prefix}dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$DLLTOOL"; then + ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +DLLTOOL=$ac_cv_prog_DLLTOOL +if test -n "$DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 +printf "%s\n" "$DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_DLLTOOL"; then + ac_ct_DLLTOOL=$DLLTOOL + # Extract the first word of "dlltool", so it can be a program name with args. +set dummy dlltool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_DLLTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_DLLTOOL"; then + ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_DLLTOOL="dlltool" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL +if test -n "$ac_ct_DLLTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 +printf "%s\n" "$ac_ct_DLLTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_DLLTOOL" = x; then + DLLTOOL="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + DLLTOOL=$ac_ct_DLLTOOL + fi +else + DLLTOOL="$ac_cv_prog_DLLTOOL" +fi + + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. +set dummy ${ac_tool_prefix}objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$OBJDUMP"; then + ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +OBJDUMP=$ac_cv_prog_OBJDUMP +if test -n "$OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 +printf "%s\n" "$OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_OBJDUMP"; then + ac_ct_OBJDUMP=$OBJDUMP + # Extract the first word of "objdump", so it can be a program name with args. +set dummy objdump; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_OBJDUMP+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_OBJDUMP"; then + ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_OBJDUMP="objdump" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP +if test -n "$ac_ct_OBJDUMP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 +printf "%s\n" "$ac_ct_OBJDUMP" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_ct_OBJDUMP" = x; then + OBJDUMP="false" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + OBJDUMP=$ac_ct_OBJDUMP + fi +else + OBJDUMP="$ac_cv_prog_OBJDUMP" +fi + + ;; +esac + +test -z "$AS" && AS=as + + + + + +test -z "$DLLTOOL" && DLLTOOL=dlltool + + + + + +test -z "$OBJDUMP" && OBJDUMP=objdump + + + + + + + + enable_dlopen=no + + + + # Check whether --enable-shared was given. +if test ${enable_shared+y} +then : + enableval=$enable_shared; p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + enable_shared=yes +fi + + + + + + + + + + # Check whether --enable-static was given. +if test ${enable_static+y} +then : + enableval=$enable_static; p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + enable_static=yes +fi + + + + + + + + + + +# Check whether --with-pic was given. +if test ${with_pic+y} +then : + withval=$with_pic; lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + pic_mode=default +fi + + + + + + + + + # Check whether --enable-fast-install was given. +if test ${enable_fast_install+y} +then : + enableval=$enable_fast_install; p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac +else $as_nop + enable_fast_install=yes +fi + + + + + + + + + shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[5-9]*,yes) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which variant of shared library versioning to provide" >&5 +printf %s "checking which variant of shared library versioning to provide... " >&6; } + +# Check whether --with-aix-soname was given. +if test ${with_aix_soname+y} +then : + withval=$with_aix_soname; case $withval in + aix|svr4|both) + ;; + *) + as_fn_error $? "Unknown argument to --with-aix-soname" "$LINENO" 5 + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname +else $as_nop + if test ${lt_cv_with_aix_soname+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_with_aix_soname=aix +fi + + with_aix_soname=$lt_cv_with_aix_soname +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_aix_soname" >&5 +printf "%s\n" "$with_aix_soname" >&6; } + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + + + + + + + + + + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +test -z "$LN_S" && LN_S="ln -s" + + + + + + + + + + + + + + +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 +printf %s "checking for objdir... " >&6; } +if test ${lt_cv_objdir+y} +then : + printf %s "(cached) " >&6 +else $as_nop + rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 +printf "%s\n" "$lt_cv_objdir" >&6; } +objdir=$lt_cv_objdir + + + + + +printf "%s\n" "#define LT_OBJDIR \"$lt_cv_objdir/\"" >>confdefs.h + + + + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +func_cc_basename $compiler +cc_basename=$func_cc_basename_result + + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 +printf %s "checking for ${ac_tool_prefix}file... " >&6; } +if test ${lt_cv_path_MAGIC_CMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/${ac_tool_prefix}file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"${ac_tool_prefix}file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +printf "%s\n" "$MAGIC_CMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + + + +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for file" >&5 +printf %s "checking for file... " >&6; } +if test ${lt_cv_path_MAGIC_CMD+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $MAGIC_CMD in +[\\/*] | ?:[\\/]*) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/file"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"file" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac +fi + +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 +printf "%s\n" "$MAGIC_CMD" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + else + MAGIC_CMD=: + fi +fi + + fi + ;; +esac + +# Use C for the default configuration in the libtool script + +lt_save_CC=$CC +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +objext=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + + + + + + + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC + +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* + +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* + + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + +lt_prog_compiler_no_builtin_flag= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; + *) + lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; + esac + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 +printf %s "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } +if test ${lt_cv_prog_compiler_rtti_exceptions+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_rtti_exceptions=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="-fno-rtti -fno-exceptions" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_rtti_exceptions=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 +printf "%s\n" "$lt_cv_prog_compiler_rtti_exceptions" >&6; } + +if test yes = "$lt_cv_prog_compiler_rtti_exceptions"; then + lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" +else + : +fi + +fi + + + + + + + lt_prog_compiler_wl= +lt_prog_compiler_pic= +lt_prog_compiler_static= + + + if test yes = "$GCC"; then + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_static='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + fi + lt_prog_compiler_pic='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + lt_prog_compiler_pic='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + lt_prog_compiler_static= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + ;; + + interix[3-9]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + lt_prog_compiler_can_build_shared=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic=-Kconform_pic + fi + ;; + + *) + lt_prog_compiler_pic='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + lt_prog_compiler_wl='-Xlinker ' + if test -n "$lt_prog_compiler_pic"; then + lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + lt_prog_compiler_wl='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + lt_prog_compiler_static='-Bstatic' + else + lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + lt_prog_compiler_pic='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + lt_prog_compiler_pic='-DDLL_EXPORT' + case $host_os in + os2*) + lt_prog_compiler_static='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + lt_prog_compiler_wl='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + lt_prog_compiler_pic='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + lt_prog_compiler_static='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + lt_prog_compiler_wl='-Wl,' + # PIC (with -KPIC) is the default. + lt_prog_compiler_static='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-static' + ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='--shared' + lt_prog_compiler_static='--static' + ;; + nagfor*) + # NAG Fortran compiler + lt_prog_compiler_wl='-Wl,-Wl,,' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + ccc*) + lt_prog_compiler_wl='-Wl,' + # All Alpha code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-qpic' + lt_prog_compiler_static='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='' + ;; + *Sun\ F* | *Sun*Fortran*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + lt_prog_compiler_wl='-Wl,' + ;; + *Intel*\ [CF]*Compiler*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fPIC' + lt_prog_compiler_static='-static' + ;; + *Portland\ Group*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-fpic' + lt_prog_compiler_static='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + lt_prog_compiler_pic='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + lt_prog_compiler_wl='-Wl,' + # All OSF/1 code is PIC. + lt_prog_compiler_static='-non_shared' + ;; + + rdos*) + lt_prog_compiler_static='-non_shared' + ;; + + solaris*) + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + lt_prog_compiler_wl='-Qoption ld ';; + *) + lt_prog_compiler_wl='-Wl,';; + esac + ;; + + sunos4*) + lt_prog_compiler_wl='-Qoption ld ' + lt_prog_compiler_pic='-PIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + lt_prog_compiler_pic='-Kconform_pic' + lt_prog_compiler_static='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_pic='-KPIC' + lt_prog_compiler_static='-Bstatic' + ;; + + unicos*) + lt_prog_compiler_wl='-Wl,' + lt_prog_compiler_can_build_shared=no + ;; + + uts4*) + lt_prog_compiler_pic='-pic' + lt_prog_compiler_static='-Bstatic' + ;; + + *) + lt_prog_compiler_can_build_shared=no + ;; + esac + fi + +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + lt_prog_compiler_pic= + ;; + *) + lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" + ;; +esac + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 +printf %s "checking for $compiler option to produce PIC... " >&6; } +if test ${lt_cv_prog_compiler_pic+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic=$lt_prog_compiler_pic +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic" >&6; } +lt_prog_compiler_pic=$lt_cv_prog_compiler_pic + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$lt_prog_compiler_pic"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 +printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } +if test ${lt_cv_prog_compiler_pic_works+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_pic_works=no + ac_outfile=conftest.$ac_objext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$lt_prog_compiler_pic -DPIC" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_pic_works=yes + fi + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 +printf "%s\n" "$lt_cv_prog_compiler_pic_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_pic_works"; then + case $lt_prog_compiler_pic in + "" | " "*) ;; + *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; + esac +else + lt_prog_compiler_pic= + lt_prog_compiler_can_build_shared=no +fi + +fi + + + + + + + + + + + +# +# Check to make sure the static flag actually works. +# +wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 +printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } +if test ${lt_cv_prog_compiler_static_works+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_static_works=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $lt_tmp_static_flag" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler_static_works=yes + fi + else + lt_cv_prog_compiler_static_works=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 +printf "%s\n" "$lt_cv_prog_compiler_static_works" >&6; } + +if test yes = "$lt_cv_prog_compiler_static_works"; then + : +else + lt_prog_compiler_static= +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 +printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; } +if test ${lt_cv_prog_compiler_c_o+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler_c_o=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + lt_cv_prog_compiler_c_o=yes + fi + fi + chmod u+w . 2>&5 + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 +printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; } + + + + +hard_links=nottested +if test no = "$lt_cv_prog_compiler_c_o" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 +printf %s "checking if we can lock with hard links... " >&6; } + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 +printf "%s\n" "$hard_links" >&6; } + if test no = "$hard_links"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&5 +printf "%s\n" "$as_me: WARNING: '$CC' does not support '-c -o', so 'make -j' may be unsafe" >&2;} + need_locks=warn + fi +else + need_locks=no +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 +printf %s "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } + + runpath_var= + allow_undefined_flag= + always_export_symbols=no + archive_cmds= + archive_expsym_cmds= + compiler_needs_object=no + enable_shared_with_static_runtimes=no + export_dynamic_flag_spec= + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + hardcode_automatic=no + hardcode_direct=no + hardcode_direct_absolute=no + hardcode_libdir_flag_spec= + hardcode_libdir_separator= + hardcode_minus_L=no + hardcode_shlibpath_var=unsupported + inherit_rpath=no + link_all_deplibs=unknown + module_cmds= + module_expsym_cmds= + old_archive_from_new_cmds= + old_archive_from_expsyms_cmds= + thread_safe_flag_spec= + whole_archive_flag_spec= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + include_expsyms= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + link_all_deplibs=no + ;; + esac + + ld_shlibs=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; + *\ \(GNU\ Binutils\)\ [3-9]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + export_dynamic_flag_spec='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + whole_archive_flag_spec=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + whole_archive_flag_spec= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[3-9]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + allow_undefined_flag=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + ld_shlibs=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, + # as there is no search path for DLLs. + hardcode_libdir_flag_spec='-L$libdir' + export_dynamic_flag_spec='$wl--export-all-symbols' + allow_undefined_flag=unsupported + always_export_symbols=no + enable_shared_with_static_runtimes=yes + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' + exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + ld_shlibs=no + fi + ;; + + haiku*) + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + link_all_deplibs=yes + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + file_list_spec='@' + ;; + + interix[3-9]*) + hardcode_direct=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + archive_expsym_cmds='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + whole_archive_flag_spec= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + whole_archive_flag_spec='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + whole_archive_flag_spec='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + compiler_needs_object=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + export_dynamic_flag_spec='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + ld_shlibs=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) + ld_shlibs=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + ;; + + sunos4*) + archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + ld_shlibs=no + fi + ;; + esac + + if test no = "$ld_shlibs"; then + runpath_var= + hardcode_libdir_flag_spec= + export_dynamic_flag_spec= + whole_archive_flag_spec= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + allow_undefined_flag=unsupported + always_export_symbols=yes + archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + hardcode_minus_L=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + hardcode_direct=unsupported + fi + ;; + + aix[4-9]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + export_symbols_cmds='`func_echo_all $NM | $SED -e '\''s/B\([^B]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && (substr(\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + archive_cmds='' + hardcode_direct=yes + hardcode_direct_absolute=yes + hardcode_libdir_separator=':' + link_all_deplibs=yes + file_list_spec='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + hardcode_direct=no + hardcode_direct_absolute=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[012]|aix4.[012].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + hardcode_direct=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + hardcode_minus_L=yes + hardcode_libdir_flag_spec='-L$libdir' + hardcode_libdir_separator= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + export_dynamic_flag_spec='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + always_export_symbols=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + allow_undefined_flag='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + hardcode_libdir_flag_spec='$wl-R $libdir:/usr/lib:/lib' + allow_undefined_flag="-z nodefs" + archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + if test ${lt_cv_aix_libpath_+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + + lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }' + lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + if test -z "$lt_cv_aix_libpath_"; then + lt_cv_aix_libpath_=/usr/lib:/lib + fi + +fi + + aix_libpath=$lt_cv_aix_libpath_ +fi + + hardcode_libdir_flag_spec='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + no_undefined_flag=' $wl-bernotok' + allow_undefined_flag=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + whole_archive_flag_spec='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + whole_archive_flag_spec='$convenience' + fi + archive_cmds_need_lc=yes + archive_expsym_cmds='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([, ]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + archive_expsym_cmds="$archive_expsym_cmds"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + archive_expsym_cmds="$archive_expsym_cmds"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + archive_expsym_cmds="$archive_expsym_cmds"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + archive_expsym_cmds='' + ;; + m68k) + archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + ;; + esac + ;; + + bsdi[45]*) + export_dynamic_flag_spec=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + always_export_symbols=yes + file_list_spec='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + archive_expsym_cmds='if test DEF = "`$SED -n -e '\''s/^[ ]*//'\'' -e '\''/^\(;.*\)*$/d'\'' -e '\''s/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p'\'' -e q $export_symbols`" ; then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, )='true' + enable_shared_with_static_runtimes=yes + exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + old_postinstall_cmds='chmod 644 $oldlib' + postlink_cmds='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + hardcode_libdir_flag_spec=' ' + allow_undefined_flag=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + old_archive_from_new_cmds='true' + # FIXME: Should let the user specify the lib program. + old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' + enable_shared_with_static_runtimes=yes + ;; + esac + ;; + + darwin* | rhapsody*) + + + archive_cmds_need_lc=no + hardcode_direct=no + hardcode_automatic=yes + hardcode_shlibpath_var=unsupported + if test yes = "$lt_cv_ld_force_load"; then + whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + + else + whole_archive_flag_spec='' + fi + link_all_deplibs=yes + allow_undefined_flag=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + archive_expsym_cmds="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + module_expsym_cmds="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + + else + ld_shlibs=no + fi + + ;; + + dgux*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + hpux9*) + if test yes = "$GCC"; then + archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + export_dynamic_flag_spec='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + archive_cmds='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + archive_cmds='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + archive_cmds='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + archive_cmds='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 +printf %s "checking if $CC understands -b... " >&6; } +if test ${lt_cv_prog_compiler__b+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_prog_compiler__b=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -b" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&5 + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + lt_cv_prog_compiler__b=yes + fi + else + lt_cv_prog_compiler__b=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 +printf "%s\n" "$lt_cv_prog_compiler__b" >&6; } + +if test yes = "$lt_cv_prog_compiler__b"; then + archive_cmds='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' +else + archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' +fi + + ;; + esac + fi + if test no = "$with_gnu_ld"; then + hardcode_libdir_flag_spec='$wl+b $wl$libdir' + hardcode_libdir_separator=: + + case $host_cpu in + hppa*64*|ia64*) + hardcode_direct=no + hardcode_shlibpath_var=no + ;; + *) + hardcode_direct=yes + hardcode_direct_absolute=yes + export_dynamic_flag_spec='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + hardcode_minus_L=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 +printf %s "checking whether the $host_os linker accepts -exported_symbol... " >&6; } +if test ${lt_cv_irix_exported_symbol+y} +then : + printf %s "(cached) " >&6 +else $as_nop + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int foo (void) { return 0; } +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + lt_cv_irix_exported_symbol=yes +else $as_nop + lt_cv_irix_exported_symbol=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 +printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; } + if test yes = "$lt_cv_irix_exported_symbol"; then + archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + link_all_deplibs=no + else + archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + inherit_rpath=yes + link_all_deplibs=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + ld_shlibs=yes + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + ;; + esac + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_direct=yes + hardcode_shlibpath_var=no + ;; + + newsos6) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + hardcode_shlibpath_var=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + hardcode_direct=yes + hardcode_shlibpath_var=no + hardcode_direct_absolute=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + export_dynamic_flag_spec='$wl-E' + else + archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + hardcode_libdir_flag_spec='$wl-rpath,$libdir' + fi + else + ld_shlibs=no + fi + ;; + + os2*) + hardcode_libdir_flag_spec='-L$libdir' + hardcode_minus_L=yes + allow_undefined_flag=unsupported + shrext_cmds=.dll + archive_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + archive_expsym_cmds='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + old_archive_From_new_cmds='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + enable_shared_with_static_runtimes=yes + file_list_spec='@' + ;; + + osf3*) + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + archive_cmds_need_lc='no' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + hardcode_libdir_separator=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + allow_undefined_flag=' $wl-expect_unresolved $wl\*' + archive_cmds='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + hardcode_libdir_flag_spec='$wl-rpath $wl$libdir' + else + allow_undefined_flag=' -expect_unresolved \*' + archive_cmds='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + hardcode_libdir_flag_spec='-rpath $libdir' + fi + archive_cmds_need_lc='no' + hardcode_libdir_separator=: + ;; + + solaris*) + no_undefined_flag=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + archive_cmds='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + archive_cmds='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + archive_cmds='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + hardcode_libdir_flag_spec='-R$libdir' + hardcode_shlibpath_var=no + case $host_os in + solaris2.[0-5] | solaris2.[0-5].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + whole_archive_flag_spec='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + whole_archive_flag_spec='-z allextract$convenience -z defaultextract' + fi + ;; + esac + link_all_deplibs=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + archive_cmds='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + hardcode_libdir_flag_spec='-L$libdir' + hardcode_direct=yes + hardcode_minus_L=yes + hardcode_shlibpath_var=no + ;; + + sysv4) + case $host_vendor in + sni) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' + reload_cmds='$CC -r -o $output$reload_objs' + hardcode_direct=no + ;; + motorola) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_direct=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + hardcode_shlibpath_var=no + ;; + + sysv4.3*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + export_dynamic_flag_spec='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_shlibpath_var=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + ld_shlibs=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) + no_undefined_flag='$wl-z,text' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + no_undefined_flag='$wl-z,text' + allow_undefined_flag='$wl-z,nodefs' + archive_cmds_need_lc=no + hardcode_shlibpath_var=no + hardcode_libdir_flag_spec='$wl-R,$libdir' + hardcode_libdir_separator=':' + link_all_deplibs=yes + export_dynamic_flag_spec='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + archive_cmds='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + archive_cmds='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + archive_expsym_cmds='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; + + *) + ld_shlibs=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + export_dynamic_flag_spec='$wl-Blargedynsym' + ;; + esac + fi + fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 +printf "%s\n" "$ld_shlibs" >&6; } +test no = "$ld_shlibs" && can_build_shared=no + +with_gnu_ld=$with_gnu_ld + + + + + + + + + + + + + + + +# +# Do we need to explicitly link libc? +# +case "x$archive_cmds_need_lc" in +x|xyes) + # Assume -lc should be added + archive_cmds_need_lc=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $archive_cmds in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 +printf %s "checking whether -lc should be explicitly linked in... " >&6; } +if test ${lt_cv_archive_cmds_need_lc+y} +then : + printf %s "(cached) " >&6 +else $as_nop + $RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$lt_prog_compiler_wl + pic_flag=$lt_prog_compiler_pic + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$allow_undefined_flag + allow_undefined_flag= + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 + (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + then + lt_cv_archive_cmds_need_lc=no + else + lt_cv_archive_cmds_need_lc=yes + fi + allow_undefined_flag=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 +printf "%s\n" "$lt_cv_archive_cmds_need_lc" >&6; } + archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc + ;; + esac + fi + ;; +esac + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 +printf %s "checking dynamic linker characteristics... " >&6; } + +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq='s|=\([A-Za-z]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[lt_foo]++; } + if (lt_freq[lt_foo] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([A-Za-z]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + + + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[4-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[01] | aix4.[01].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a(lib.so.V)' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V($shared_archive_member_spec.o), lib.a(lib.so.V)" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a(lib.so.V), lib.so.V($shared_archive_member_spec.o)" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[45]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[.]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' + + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[23].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[01]* | freebsdelf3.[01]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ + freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[3-9]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + hardcode_libdir_flag_spec='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + if test ${lt_cv_shlibpath_overrides_runpath+y} +then : + printf %s "(cached) " >&6 +else $as_nop + lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ + LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null +then : + lt_cv_shlibpath_overrides_runpath=yes +fi +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + +fi + + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 +printf "%s\n" "$dynamic_linker" >&6; } +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 +printf %s "checking how to hardcode library paths into programs... " >&6; } +hardcode_action= +if test -n "$hardcode_libdir_flag_spec" || + test -n "$runpath_var" || + test yes = "$hardcode_automatic"; then + + # We can hardcode non-existent directories. + if test no != "$hardcode_direct" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, )" && + test no != "$hardcode_minus_L"; then + # Linking always hardcodes the temporary library directory. + hardcode_action=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + hardcode_action=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + hardcode_action=unsupported +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 +printf "%s\n" "$hardcode_action" >&6; } + +if test relink = "$hardcode_action" || + test yes = "$inherit_rpath"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi + + + + + + + if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +printf %s "checking for dlopen in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlopen (); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlopen=yes +else $as_nop + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else $as_nop + + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + +fi + + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" +if test "x$ac_cv_func_shl_load" = xyes +then : + lt_cv_dlopen=shl_load +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 +printf %s "checking for shl_load in -ldld... " >&6; } +if test ${ac_cv_lib_dld_shl_load+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char shl_load (); +int +main (void) +{ +return shl_load (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dld_shl_load=yes +else $as_nop + ac_cv_lib_dld_shl_load=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 +printf "%s\n" "$ac_cv_lib_dld_shl_load" >&6; } +if test "x$ac_cv_lib_dld_shl_load" = xyes +then : + lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld +else $as_nop + ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" +if test "x$ac_cv_func_dlopen" = xyes +then : + lt_cv_dlopen=dlopen +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 +printf %s "checking for dlopen in -ldl... " >&6; } +if test ${ac_cv_lib_dl_dlopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldl $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlopen (); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dl_dlopen=yes +else $as_nop + ac_cv_lib_dl_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; } +if test "x$ac_cv_lib_dl_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 +printf %s "checking for dlopen in -lsvld... " >&6; } +if test ${ac_cv_lib_svld_dlopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dlopen (); +int +main (void) +{ +return dlopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_svld_dlopen=yes +else $as_nop + ac_cv_lib_svld_dlopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 +printf "%s\n" "$ac_cv_lib_svld_dlopen" >&6; } +if test "x$ac_cv_lib_svld_dlopen" = xyes +then : + lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 +printf %s "checking for dld_link in -ldld... " >&6; } +if test ${ac_cv_lib_dld_dld_link+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ldld $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char dld_link (); +int +main (void) +{ +return dld_link (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_dld_dld_link=yes +else $as_nop + ac_cv_lib_dld_dld_link=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 +printf "%s\n" "$ac_cv_lib_dld_dld_link" >&6; } +if test "x$ac_cv_lib_dld_dld_link" = xyes +then : + lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld +fi + + +fi + + +fi + + +fi + + +fi + + +fi + + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 +printf %s "checking whether a program can dlopen itself... " >&6; } +if test ${lt_cv_dlopen_self+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self=no + fi +fi +rm -fr conftest* + + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 +printf "%s\n" "$lt_cv_dlopen_self" >&6; } + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 +printf %s "checking whether a statically linked program can dlopen itself... " >&6; } +if test ${lt_cv_dlopen_self_static+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test yes = "$cross_compiling"; then : + lt_cv_dlopen_self_static=cross +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +} +_LT_EOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&5 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; + x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; + esac + else : + # compilation failed + lt_cv_dlopen_self_static=no + fi +fi +rm -fr conftest* + + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 +printf "%s\n" "$lt_cv_dlopen_self_static" >&6; } + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi + + + + + + + + + + + + + + + + + +striplib= +old_striplib= +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 +printf %s "checking whether stripping libraries is possible... " >&6; } +if test -z "$STRIP"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +else + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough + striplib="$STRIP -x" + old_striplib="$STRIP -S" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + fi + ;; + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + ;; + esac + fi +fi + + + + + + + + + + + + + # Report what library types will actually be built + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 +printf %s "checking if libtool supports shared libraries... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 +printf "%s\n" "$can_build_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 +printf %s "checking whether to build shared libraries... " >&6; } + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[4-9]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 +printf "%s\n" "$enable_shared" >&6; } + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 +printf %s "checking whether to build static libraries... " >&6; } + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 +printf "%s\n" "$enable_static" >&6; } + + + + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +CC=$lt_save_CC + + + + + + + + + + + + + + + + ac_config_commands="$ac_config_commands libtool" + + + + +# Only expand once: + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 +printf %s "checking whether ln -s works... " >&6; } +LN_S=$as_ln_s +if test "$LN_S" = "ln -s"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 +printf "%s\n" "no, using $LN_S" >&6; } +fi + + +# Check whether --enable-largefile was given. +if test ${enable_largefile+y} +then : + enableval=$enable_largefile; +fi + +if test "$enable_largefile" != no; then + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for special C compiler options needed for large files" >&5 +printf %s "checking for special C compiler options needed for large files... " >&6; } +if test ${ac_cv_sys_largefile_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_sys_largefile_CC=no + if test "$GCC" != yes; then + ac_save_CC=$CC + while :; do + # IRIX 6.2 and later do not support large files by default, + # so use the C compiler's -n32 option if that helps. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF + if ac_fn_c_try_compile "$LINENO" +then : + break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + CC="$CC -n32" + if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_largefile_CC=' -n32'; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + break + done + CC=$ac_save_CC + rm -f conftest.$ac_ext + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_largefile_CC" >&5 +printf "%s\n" "$ac_cv_sys_largefile_CC" >&6; } + if test "$ac_cv_sys_largefile_CC" != no; then + CC=$CC$ac_cv_sys_largefile_CC + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _FILE_OFFSET_BITS value needed for large files" >&5 +printf %s "checking for _FILE_OFFSET_BITS value needed for large files... " >&6; } +if test ${ac_cv_sys_file_offset_bits+y} +then : + printf %s "(cached) " >&6 +else $as_nop + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_file_offset_bits=no; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _FILE_OFFSET_BITS 64 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_file_offset_bits=64; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_cv_sys_file_offset_bits=unknown + break +done +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_file_offset_bits" >&5 +printf "%s\n" "$ac_cv_sys_file_offset_bits" >&6; } +case $ac_cv_sys_file_offset_bits in #( + no | unknown) ;; + *) +printf "%s\n" "#define _FILE_OFFSET_BITS $ac_cv_sys_file_offset_bits" >>confdefs.h +;; +esac +rm -rf conftest* + if test $ac_cv_sys_file_offset_bits = unknown; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _LARGE_FILES value needed for large files" >&5 +printf %s "checking for _LARGE_FILES value needed for large files... " >&6; } +if test ${ac_cv_sys_large_files+y} +then : + printf %s "(cached) " >&6 +else $as_nop + while :; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_large_files=no; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#define _LARGE_FILES 1 +#include + /* Check that off_t can represent 2**63 - 1 correctly. + We can't simply define LARGE_OFF_T to be 9223372036854775807, + since some C++ compilers masquerading as C compilers + incorrectly reject 9223372036854775807. */ +#define LARGE_OFF_T (((off_t) 1 << 31 << 31) - 1 + ((off_t) 1 << 31 << 31)) + int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 + && LARGE_OFF_T % 2147483647 == 1) + ? 1 : -1]; +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_sys_large_files=1; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_cv_sys_large_files=unknown + break +done +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sys_large_files" >&5 +printf "%s\n" "$ac_cv_sys_large_files" >&6; } +case $ac_cv_sys_large_files in #( + no | unknown) ;; + *) +printf "%s\n" "#define _LARGE_FILES $ac_cv_sys_large_files" >>confdefs.h +;; +esac +rm -rf conftest* + fi +fi + + +# Check for GCC visibility feature + + + + VISIBILITY_CFLAGS= + HAVE_VISIBILITY=0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the -Werror option is usable" >&5 +printf %s "checking whether the -Werror option is usable... " >&6; } + if test ${pcre2_cv_cc_vis_werror+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + pcre2_cv_cc_vis_werror=no + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + #warning e + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +else $as_nop + pcre2_cv_cc_vis_werror=yes + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_vis_werror" >&5 +printf "%s\n" "$pcre2_cv_cc_vis_werror" >&6; } + if test -n "$pcre2_cv_cc_vis_werror" && test $pcre2_cv_cc_vis_werror = yes + then + WORKING_WERROR=1 + else + WORKING_WERROR=0 + fi + if test $pcre2_cv_cc_vis_werror = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GCC compatible visibility declarations" >&5 +printf %s "checking for GCC compatible visibility declarations... " >&6; } + if test ${pcre2_cv_cc_visibility+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror -fvisibility=hidden" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + extern __attribute__((__visibility__("default"))) int exportedfunc (void); + void dummyfunc (void) {} + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cv_cc_visibility=yes +else $as_nop + pcre2_cv_cc_visibility=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS="$pcre2_save_CFLAGS" +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cv_cc_visibility" >&5 +printf "%s\n" "$pcre2_cv_cc_visibility" >&6; } + fi + if test -n "$pcre2_cv_cc_visibility" && test $pcre2_cv_cc_visibility = yes + then + VISIBILITY_CFLAGS="-fvisibility=hidden" + HAVE_VISIBILITY=1 + +printf "%s\n" "#define PCRE2_EXPORT __attribute__ ((visibility (\"default\")))" >>confdefs.h + + else + +printf "%s\n" "#define PCRE2_EXPORT /**/" >>confdefs.h + + fi + + + +printf "%s\n" "#define HAVE_VISIBILITY $HAVE_VISIBILITY" >>confdefs.h + + + +# Check for Clang __attribute__((uninitialized)) feature + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __attribute__((uninitialized))" >&5 +printf %s "checking for __attribute__((uninitialized))... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +tmp_CFLAGS=$CFLAGS +if test $WORKING_WERROR -eq 1; then + CFLAGS="$CFLAGS -Werror" +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +char buf[128] __attribute__((uninitialized));(void)buf + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cc_cv_attribute_uninitialized=yes +else $as_nop + pcre2_cc_cv_attribute_uninitialized=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_attribute_uninitialized" >&5 +printf "%s\n" "$pcre2_cc_cv_attribute_uninitialized" >&6; } +if test "$pcre2_cc_cv_attribute_uninitialized" = yes; then + +printf "%s\n" "#define HAVE_ATTRIBUTE_UNINITIALIZED 1" >>confdefs.h + +fi +CFLAGS=$tmp_CFLAGS +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Check for the assume() builtin + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __assume()" >&5 +printf %s "checking for __assume()... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +__assume(1) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pcre2_cc_cv_builtin_assume=yes +else $as_nop + pcre2_cc_cv_builtin_assume=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_assume" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_assume" >&6; } +if test "$pcre2_cc_cv_builtin_assume" = yes; then + +printf "%s\n" "#define HAVE_BUILTIN_ASSUME 1" >>confdefs.h + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Check for the mul_overflow() builtin + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_mul_overflow()" >&5 +printf %s "checking for __builtin_mul_overflow()... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #ifdef HAVE_SYS_TYPES_H + #include + #endif + #include + + int a, b; + size_t m; + +int +main (void) +{ +__builtin_mul_overflow(a, b, &m) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pcre2_cc_cv_builtin_mul_overflow=yes +else $as_nop + pcre2_cc_cv_builtin_mul_overflow=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_mul_overflow" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_mul_overflow" >&6; } +if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then + +printf "%s\n" "#define HAVE_BUILTIN_MUL_OVERFLOW 1" >>confdefs.h + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Check for the unreachable() builtin + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_unreachable()" >&5 +printf %s "checking for __builtin_unreachable()... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int r; +int +main (void) +{ +if (r) __builtin_unreachable() + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pcre2_cc_cv_builtin_unreachable=yes +else $as_nop + pcre2_cc_cv_builtin_unreachable=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_builtin_unreachable" >&5 +printf "%s\n" "$pcre2_cc_cv_builtin_unreachable" >&6; } +if test "$pcre2_cc_cv_builtin_unreachable" = yes; then + +printf "%s\n" "#define HAVE_BUILTIN_UNREACHABLE 1" >>confdefs.h + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Versioning + +PCRE2_MAJOR="10" +PCRE2_MINOR="45" +PCRE2_PRERELEASE="" +PCRE2_DATE="2025-02-05" + +if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" +then + echo "***" + echo "*** Minor version number $PCRE2_MINOR must not be used. ***" + echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***" + echo "***" + exit 1 +fi + + + + + + +# Set a more sensible default value for $(htmldir). +if test "x$htmldir" = 'x${docdir}' +then + htmldir='${docdir}/html' +fi + +# Force an error for PCRE1 size options +# Check whether --enable-pcre8 was given. +if test ${enable_pcre8+y} +then : + enableval=$enable_pcre8; +else $as_nop + enable_pcre8=no +fi + +# Check whether --enable-pcre16 was given. +if test ${enable_pcre16+y} +then : + enableval=$enable_pcre16; +else $as_nop + enable_pcre16=no +fi + +# Check whether --enable-pcre32 was given. +if test ${enable_pcre32+y} +then : + enableval=$enable_pcre32; +else $as_nop + enable_pcre32=no +fi + + +if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" +then + echo "** ERROR: Use --[en|dis]able-pcre2-[8|16|32], not --[en|dis]able-pcre[8|16|32]" + exit 1 +fi + +# Handle --disable-pcre2-8 (enabled by default) +# Check whether --enable-pcre2-8 was given. +if test ${enable_pcre2_8+y} +then : + enableval=$enable_pcre2_8; +else $as_nop + enable_pcre2_8=unset +fi + + + +# Handle --enable-pcre2-16 (disabled by default) +# Check whether --enable-pcre2-16 was given. +if test ${enable_pcre2_16+y} +then : + enableval=$enable_pcre2_16; +else $as_nop + enable_pcre2_16=unset +fi + + + +# Handle --enable-pcre2-32 (disabled by default) +# Check whether --enable-pcre2-32 was given. +if test ${enable_pcre2_32+y} +then : + enableval=$enable_pcre2_32; +else $as_nop + enable_pcre2_32=unset +fi + + + +# Handle --enable-debug (disabled by default) +# Check whether --enable-debug was given. +if test ${enable_debug+y} +then : + enableval=$enable_debug; +else $as_nop + enable_debug=no +fi + + +# Handle --enable-jit (disabled by default) +# Check whether --enable-jit was given. +if test ${enable_jit+y} +then : + enableval=$enable_jit; +else $as_nop + enable_jit=no +fi + + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #define SLJIT_CONFIG_AUTO 1 + #include "deps/sljit/sljit_src/sljitConfigCPU.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + enable_jit=yes +else $as_nop + enable_jit=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + # Check whether --enable-jit-sealloc was given. +if test ${enable_jit_sealloc+y} +then : + enableval=$enable_jit_sealloc; +else $as_nop + enable_jit_sealloc=no +fi + + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +# Check whether --enable-pcre2grep-jit was given. +if test ${enable_pcre2grep_jit+y} +then : + enableval=$enable_pcre2grep_jit; +else $as_nop + enable_pcre2grep_jit=yes +fi + + +# Handle --disable-pcre2grep-callout (enabled by default) +# Check whether --enable-pcre2grep-callout was given. +if test ${enable_pcre2grep_callout+y} +then : + enableval=$enable_pcre2grep_callout; +else $as_nop + enable_pcre2grep_callout=yes +fi + + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +# Check whether --enable-pcre2grep-callout-fork was given. +if test ${enable_pcre2grep_callout_fork+y} +then : + enableval=$enable_pcre2grep_callout_fork; +else $as_nop + enable_pcre2grep_callout_fork=yes +fi + + +# Handle --enable-rebuild-chartables +# Check whether --enable-rebuild-chartables was given. +if test ${enable_rebuild_chartables+y} +then : + enableval=$enable_rebuild_chartables; +else $as_nop + enable_rebuild_chartables=no +fi + + +# Handle --disable-unicode (enabled by default) +# Check whether --enable-unicode was given. +if test ${enable_unicode+y} +then : + enableval=$enable_unicode; +else $as_nop + enable_unicode=unset +fi + + +# Handle newline options +ac_pcre2_newline=lf +# Check whether --enable-newline-is-cr was given. +if test ${enable_newline_is_cr+y} +then : + enableval=$enable_newline_is_cr; ac_pcre2_newline=cr +fi + +# Check whether --enable-newline-is-lf was given. +if test ${enable_newline_is_lf+y} +then : + enableval=$enable_newline_is_lf; ac_pcre2_newline=lf +fi + +# Check whether --enable-newline-is-crlf was given. +if test ${enable_newline_is_crlf+y} +then : + enableval=$enable_newline_is_crlf; ac_pcre2_newline=crlf +fi + +# Check whether --enable-newline-is-anycrlf was given. +if test ${enable_newline_is_anycrlf+y} +then : + enableval=$enable_newline_is_anycrlf; ac_pcre2_newline=anycrlf +fi + +# Check whether --enable-newline-is-any was given. +if test ${enable_newline_is_any+y} +then : + enableval=$enable_newline_is_any; ac_pcre2_newline=any +fi + +# Check whether --enable-newline-is-nul was given. +if test ${enable_newline_is_nul+y} +then : + enableval=$enable_newline_is_nul; ac_pcre2_newline=nul +fi + +enable_newline="$ac_pcre2_newline" + +# Handle --enable-bsr-anycrlf +# Check whether --enable-bsr-anycrlf was given. +if test ${enable_bsr_anycrlf+y} +then : + enableval=$enable_bsr_anycrlf; +else $as_nop + enable_bsr_anycrlf=no +fi + + +# Handle --enable-never-backslash-C +# Check whether --enable-never-backslash-C was given. +if test ${enable_never_backslash_C+y} +then : + enableval=$enable_never_backslash_C; +else $as_nop + enable_never_backslash_C=no +fi + + +# Handle --enable-ebcdic +# Check whether --enable-ebcdic was given. +if test ${enable_ebcdic+y} +then : + enableval=$enable_ebcdic; +else $as_nop + enable_ebcdic=no +fi + + +# Handle --enable-ebcdic-nl25 +# Check whether --enable-ebcdic-nl25 was given. +if test ${enable_ebcdic_nl25+y} +then : + enableval=$enable_ebcdic_nl25; +else $as_nop + enable_ebcdic_nl25=no +fi + + +# Handle --enable-pcre2grep-libz +# Check whether --enable-pcre2grep-libz was given. +if test ${enable_pcre2grep_libz+y} +then : + enableval=$enable_pcre2grep_libz; +else $as_nop + enable_pcre2grep_libz=no +fi + + +# Handle --enable-pcre2grep-libbz2 +# Check whether --enable-pcre2grep-libbz2 was given. +if test ${enable_pcre2grep_libbz2+y} +then : + enableval=$enable_pcre2grep_libbz2; +else $as_nop + enable_pcre2grep_libbz2=no +fi + + +# Handle --with-pcre2grep-bufsize=N + +# Check whether --with-pcre2grep-bufsize was given. +if test ${with_pcre2grep_bufsize+y} +then : + withval=$with_pcre2grep_bufsize; +else $as_nop + with_pcre2grep_bufsize=20480 +fi + + +# Handle --with-pcre2grep-max-bufsize=N + +# Check whether --with-pcre2grep-max-bufsize was given. +if test ${with_pcre2grep_max_bufsize+y} +then : + withval=$with_pcre2grep_max_bufsize; +else $as_nop + with_pcre2grep_max_bufsize=1048576 +fi + + +# Handle --enable-pcre2test-libedit +# Check whether --enable-pcre2test-libedit was given. +if test ${enable_pcre2test_libedit+y} +then : + enableval=$enable_pcre2test_libedit; +else $as_nop + enable_pcre2test_libedit=no +fi + + +# Handle --enable-pcre2test-libreadline +# Check whether --enable-pcre2test-libreadline was given. +if test ${enable_pcre2test_libreadline+y} +then : + enableval=$enable_pcre2test_libreadline; +else $as_nop + enable_pcre2test_libreadline=no +fi + + +# Handle --with-link-size=N + +# Check whether --with-link-size was given. +if test ${with_link_size+y} +then : + withval=$with_link_size; +else $as_nop + with_link_size=2 +fi + + +# Handle --with-max-varlookbehind=N + +# Check whether --with-max-varlookbehind was given. +if test ${with_max_varlookbehind+y} +then : + withval=$with_max_varlookbehind; +else $as_nop + with_max_varlookbehind=255 +fi + + +# Handle --with-parens-nest-limit=N + +# Check whether --with-parens-nest-limit was given. +if test ${with_parens_nest_limit+y} +then : + withval=$with_parens_nest_limit; +else $as_nop + with_parens_nest_limit=250 +fi + + +# Handle --with-heap-limit + +# Check whether --with-heap-limit was given. +if test ${with_heap_limit+y} +then : + withval=$with_heap_limit; +else $as_nop + with_heap_limit=20000000 +fi + + +# Handle --with-match-limit=N + +# Check whether --with-match-limit was given. +if test ${with_match_limit+y} +then : + withval=$with_match_limit; +else $as_nop + with_match_limit=10000000 +fi + + +# Handle --with-match-limit-depth=N +# Recognize old synonym --with-match-limit-recursion +# +# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as +# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g. +# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric +# value (or even the same numeric value as MATCH_LIMIT, though no longer +# defined in terms of the latter). +# + +# Check whether --with-match-limit-depth was given. +if test ${with_match_limit_depth+y} +then : + withval=$with_match_limit_depth; +else $as_nop + with_match_limit_depth=MATCH_LIMIT +fi + + + +# Check whether --with-match-limit-recursion was given. +if test ${with_match_limit_recursion+y} +then : + withval=$with_match_limit_recursion; +else $as_nop + with_match_limit_recursion=UNSET +fi + + +# Handle --enable-valgrind +# Check whether --enable-valgrind was given. +if test ${enable_valgrind+y} +then : + enableval=$enable_valgrind; +else $as_nop + enable_valgrind=no +fi + + +# Enable code coverage reports using gcov +# Check whether --enable-coverage was given. +if test ${enable_coverage+y} +then : + enableval=$enable_coverage; +else $as_nop + enable_coverage=no +fi + + +# Handle --enable-fuzz-support +# Check whether --enable-fuzz_support was given. +if test ${enable_fuzz_support+y} +then : + enableval=$enable_fuzz_support; +else $as_nop + enable_fuzz_support=no +fi + + +# Handle --enable-diff-fuzz-support +# Check whether --enable-diff_fuzz_support was given. +if test ${enable_diff_fuzz_support+y} +then : + enableval=$enable_diff_fuzz_support; +else $as_nop + enable_diff_fuzz_support=no +fi + + +# Handle --disable-stack-for-recursion +# This option became obsolete at release 10.30. +# Check whether --enable-stack-for-recursion was given. +if test ${enable_stack_for_recursion+y} +then : + enableval=$enable_stack_for_recursion; +else $as_nop + enable_stack_for_recursion=yes +fi + + +# Original code +# AC_ARG_ENABLE(stack-for-recursion, +# AS_HELP_STRING([--disable-stack-for-recursion], +# [don't use stack recursion when matching]), +# , enable_stack_for_recursion=yes) + +# Handle --disable-percent_zt (set as "auto" by default) +# Check whether --enable-percent-zt was given. +if test ${enable_percent_zt+y} +then : + enableval=$enable_percent_zt; +else $as_nop + enable_percent_zt=auto +fi + + +# Set the default value for pcre2-8 +if test "x$enable_pcre2_8" = "xunset" +then + enable_pcre2_8=yes +fi + +# Set the default value for pcre2-16 +if test "x$enable_pcre2_16" = "xunset" +then + enable_pcre2_16=no +fi + +# Set the default value for pcre2-32 +if test "x$enable_pcre2_32" = "xunset" +then + enable_pcre2_32=no +fi + +# Make sure at least one library is selected +if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono" +then + as_fn_error $? "At least one of the 8, 16 or 32 bit libraries must be enabled" "$LINENO" 5 +fi + +# Unicode is enabled by default. +if test "x$enable_unicode" = "xunset" +then + enable_unicode=yes +fi + +# Convert the newline identifier into the appropriate integer value. These must +# agree with the PCRE2_NEWLINE_xxx values in pcre2.h. + +case "$enable_newline" in + cr) ac_pcre2_newline_value=1 ;; + lf) ac_pcre2_newline_value=2 ;; + crlf) ac_pcre2_newline_value=3 ;; + any) ac_pcre2_newline_value=4 ;; + anycrlf) ac_pcre2_newline_value=5 ;; + nul) ac_pcre2_newline_value=6 ;; + *) + as_fn_error $? "invalid argument \"$enable_newline\" to --enable-newline option" "$LINENO" 5 + ;; +esac + +# --enable-ebcdic-nl25 implies --enable-ebcdic +if test "x$enable_ebcdic_nl25" = "xyes"; then + enable_ebcdic=yes +fi + +# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. +# Also check that UTF support is not requested, because PCRE2 cannot handle +# EBCDIC and UTF in the same build. To do so it would need to use different +# character constants depending on the mode. Also, EBCDIC cannot be used with +# 16-bit and 32-bit libraries. +# +if test "x$enable_ebcdic" = "xyes"; then + enable_rebuild_chartables=yes + if test "x$enable_unicode" = "xyes"; then + as_fn_error $? "support for EBCDIC and Unicode cannot be enabled at the same time" "$LINENO" 5 + fi + if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then + as_fn_error $? "EBCDIC support is available only for the 8-bit library" "$LINENO" 5 + fi +fi + +# Check argument to --with-link-size +case "$with_link_size" in + 2|3|4) ;; + *) + as_fn_error $? "invalid argument \"$with_link_size\" to --with-link-size option" "$LINENO" 5 + ;; +esac + + + +# Checks for header files. +ac_fn_c_check_header_compile "$LINENO" "assert.h" "ac_cv_header_assert_h" "$ac_includes_default" +if test "x$ac_cv_header_assert_h" = xyes +then : + printf "%s\n" "#define HAVE_ASSERT_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "limits.h" "ac_cv_header_limits_h" "$ac_includes_default" +if test "x$ac_cv_header_limits_h" = xyes +then : + printf "%s\n" "#define HAVE_LIMITS_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "sys/types.h" "ac_cv_header_sys_types_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_types_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_TYPES_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "sys/stat.h" "ac_cv_header_sys_stat_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_stat_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_STAT_H 1" >>confdefs.h + +fi +ac_fn_c_check_header_compile "$LINENO" "dirent.h" "ac_cv_header_dirent_h" "$ac_includes_default" +if test "x$ac_cv_header_dirent_h" = xyes +then : + printf "%s\n" "#define HAVE_DIRENT_H 1" >>confdefs.h + +fi + + for ac_header in windows.h +do : + ac_fn_c_check_header_compile "$LINENO" "windows.h" "ac_cv_header_windows_h" "$ac_includes_default" +if test "x$ac_cv_header_windows_h" = xyes +then : + printf "%s\n" "#define HAVE_WINDOWS_H 1" >>confdefs.h + HAVE_WINDOWS_H=1 +fi + +done + for ac_header in sys/wait.h +do : + ac_fn_c_check_header_compile "$LINENO" "sys/wait.h" "ac_cv_header_sys_wait_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_wait_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_WAIT_H 1" >>confdefs.h + HAVE_SYS_WAIT_H=1 +fi + +done + +# Conditional compilation + if test "x$enable_pcre2_8" = "xyes"; then + WITH_PCRE2_8_TRUE= + WITH_PCRE2_8_FALSE='#' +else + WITH_PCRE2_8_TRUE='#' + WITH_PCRE2_8_FALSE= +fi + + if test "x$enable_pcre2_16" = "xyes"; then + WITH_PCRE2_16_TRUE= + WITH_PCRE2_16_FALSE='#' +else + WITH_PCRE2_16_TRUE='#' + WITH_PCRE2_16_FALSE= +fi + + if test "x$enable_pcre2_32" = "xyes"; then + WITH_PCRE2_32_TRUE= + WITH_PCRE2_32_FALSE='#' +else + WITH_PCRE2_32_TRUE='#' + WITH_PCRE2_32_FALSE= +fi + + if test "x$enable_rebuild_chartables" = "xyes"; then + WITH_REBUILD_CHARTABLES_TRUE= + WITH_REBUILD_CHARTABLES_FALSE='#' +else + WITH_REBUILD_CHARTABLES_TRUE='#' + WITH_REBUILD_CHARTABLES_FALSE= +fi + + if test "x$enable_jit" = "xyes"; then + WITH_JIT_TRUE= + WITH_JIT_FALSE='#' +else + WITH_JIT_TRUE='#' + WITH_JIT_FALSE= +fi + + if test "x$enable_unicode" = "xyes"; then + WITH_UNICODE_TRUE= + WITH_UNICODE_FALSE='#' +else + WITH_UNICODE_TRUE='#' + WITH_UNICODE_FALSE= +fi + + if test "x$enable_valgrind" = "xyes"; then + WITH_VALGRIND_TRUE= + WITH_VALGRIND_FALSE='#' +else + WITH_VALGRIND_TRUE='#' + WITH_VALGRIND_FALSE= +fi + + if test "x$enable_fuzz_support" = "xyes"; then + WITH_FUZZ_SUPPORT_TRUE= + WITH_FUZZ_SUPPORT_FALSE='#' +else + WITH_FUZZ_SUPPORT_TRUE='#' + WITH_FUZZ_SUPPORT_FALSE= +fi + + if test "x$enable_diff_fuzz_support" = "xyes"; then + WITH_DIFF_FUZZ_SUPPORT_TRUE= + WITH_DIFF_FUZZ_SUPPORT_FALSE='#' +else + WITH_DIFF_FUZZ_SUPPORT_TRUE='#' + WITH_DIFF_FUZZ_SUPPORT_FALSE= +fi + + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +if test "$enable_diff_fuzz_support" = "yes"; then + if test "$enable_fuzz_support" = "no"; then + echo "** ERROR: Differential fuzzing support requires fuzzing support" + exit 1 + fi + if test "$enable_jit" = "no"; then + echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support" + exit 1 + fi + +printf "%s\n" "#define SUPPORT_DIFF_FUZZ /**/" >>confdefs.h + +fi + +# Checks for typedefs, structures, and compiler characteristics. + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 +printf %s "checking for an ANSI C-conforming const... " >&6; } +if test ${ac_cv_c_const+y} +then : + printf %s "(cached) " >&6 +else $as_nop + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + +#ifndef __cplusplus + /* Ultrix mips cc rejects this sort of thing. */ + typedef int charset[2]; + const charset cs = { 0, 0 }; + /* SunOS 4.1.1 cc rejects this. */ + char const *const *pcpcc; + char **ppc; + /* NEC SVR4.0.2 mips cc rejects this. */ + struct point {int x, y;}; + static struct point const zero = {0,0}; + /* IBM XL C 1.02.0.0 rejects this. + It does not let you subtract one const X* pointer from another in + an arm of an if-expression whose if-part is not a constant + expression */ + const char *g = "string"; + pcpcc = &g + (g ? g-g : 0); + /* HPUX 7.0 cc rejects these. */ + ++pcpcc; + ppc = (char**) pcpcc; + pcpcc = (char const *const *) ppc; + { /* SCO 3.2v4 cc rejects this sort of thing. */ + char tx; + char *t = &tx; + char const *s = 0 ? (char *) 0 : (char const *) 0; + + *t++ = 0; + if (s) return 0; + } + { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ + int x[] = {25, 17}; + const int *foo = &x[0]; + ++foo; + } + { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ + typedef const int *iptr; + iptr p = 0; + ++p; + } + { /* IBM XL C 1.02.0.0 rejects this sort of thing, saying + "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ + struct s { int j; const int *ap[3]; } bx; + struct s *b = &bx; b->j = 5; + } + { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ + const int foo = 10; + if (!foo) return 0; + } + return !cs[0] && !zero.x; +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_c_const=yes +else $as_nop + ac_cv_c_const=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 +printf "%s\n" "$ac_cv_c_const" >&6; } +if test $ac_cv_c_const = no; then + +printf "%s\n" "#define const /**/" >>confdefs.h + +fi + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes +then : + +else $as_nop + +printf "%s\n" "#define size_t unsigned int" >>confdefs.h + +fi + + +# Checks for library functions. + +ac_fn_c_check_func "$LINENO" "bcopy" "ac_cv_func_bcopy" +if test "x$ac_cv_func_bcopy" = xyes +then : + printf "%s\n" "#define HAVE_BCOPY 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "memfd_create" "ac_cv_func_memfd_create" +if test "x$ac_cv_func_memfd_create" = xyes +then : + printf "%s\n" "#define HAVE_MEMFD_CREATE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "memmove" "ac_cv_func_memmove" +if test "x$ac_cv_func_memmove" = xyes +then : + printf "%s\n" "#define HAVE_MEMMOVE 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "mkostemp" "ac_cv_func_mkostemp" +if test "x$ac_cv_func_mkostemp" = xyes +then : + printf "%s\n" "#define HAVE_MKOSTEMP 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "secure_getenv" "ac_cv_func_secure_getenv" +if test "x$ac_cv_func_secure_getenv" = xyes +then : + printf "%s\n" "#define HAVE_SECURE_GETENV 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "strerror" "ac_cv_func_strerror" +if test "x$ac_cv_func_strerror" = xyes +then : + printf "%s\n" "#define HAVE_STRERROR 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for realpath" >&5 +printf %s "checking for realpath... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int +main (void) +{ + +char buffer[PATH_MAX]; +realpath(".", buffer); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +printf "%s\n" "#define HAVE_REALPATH 1" >>confdefs.h + + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +# Check for the availability of libz (aka zlib) + + for ac_header in zlib.h +do : + ac_fn_c_check_header_compile "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default" +if test "x$ac_cv_header_zlib_h" = xyes +then : + printf "%s\n" "#define HAVE_ZLIB_H 1" >>confdefs.h + HAVE_ZLIB_H=1 +fi + +done +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gzopen in -lz" >&5 +printf %s "checking for gzopen in -lz... " >&6; } +if test ${ac_cv_lib_z_gzopen+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char gzopen (); +int +main (void) +{ +return gzopen (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_z_gzopen=yes +else $as_nop + ac_cv_lib_z_gzopen=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_gzopen" >&5 +printf "%s\n" "$ac_cv_lib_z_gzopen" >&6; } +if test "x$ac_cv_lib_z_gzopen" = xyes +then : + HAVE_LIBZ=1 +fi + + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + + for ac_header in bzlib.h +do : + ac_fn_c_check_header_compile "$LINENO" "bzlib.h" "ac_cv_header_bzlib_h" "$ac_includes_default" +if test "x$ac_cv_header_bzlib_h" = xyes +then : + printf "%s\n" "#define HAVE_BZLIB_H 1" >>confdefs.h + HAVE_BZLIB_H=1 +fi + +done +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libbz2" >&5 +printf %s "checking for libbz2... " >&6; } +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifdef HAVE_BZLIB_H +#include +#endif +int +main (void) +{ +return (int)BZ2_bzopen("conftest", "rb"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; };HAVE_LIBBZ2=1; break; +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + for ac_header in readline/readline.h +do : + ac_fn_c_check_header_compile "$LINENO" "readline/readline.h" "ac_cv_header_readline_readline_h" "$ac_includes_default" +if test "x$ac_cv_header_readline_readline_h" = xyes +then : + printf "%s\n" "#define HAVE_READLINE_READLINE_H 1" >>confdefs.h + HAVE_READLINE_H=1 +fi + +done + for ac_header in readline/history.h +do : + ac_fn_c_check_header_compile "$LINENO" "readline/history.h" "ac_cv_header_readline_history_h" "$ac_includes_default" +if test "x$ac_cv_header_readline_history_h" = xyes +then : + printf "%s\n" "#define HAVE_READLINE_HISTORY_H 1" >>confdefs.h + HAVE_HISTORY_H=1 +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lreadline" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -ltinfo $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-ltinfo" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lcurses $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lcurses" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lncurses $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lncurses" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -lncursesw $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-lncursesw" +else $as_nop + unset ac_cv_lib_readline_readline; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5 +printf %s "checking for readline in -lreadline... " >&6; } +if test ${ac_cv_lib_readline_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lreadline -ltermcap $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_readline_readline=yes +else $as_nop + ac_cv_lib_readline_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5 +printf "%s\n" "$ac_cv_lib_readline_readline" >&6; } +if test "x$ac_cv_lib_readline_readline" = xyes +then : + LIBREADLINE="-ltermcap" +else $as_nop + LIBREADLINE="" +fi + +fi + +fi + +fi + +fi + +fi + + + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + for ac_header in editline/readline.h edit/readline/readline.h readline.h +do : + as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" +then : + cat >>confdefs.h <<_ACEOF +#define `printf "%s\n" "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + + HAVE_LIBEDIT_HEADER=1 + break + +fi + +done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline in -ledit" >&5 +printf %s "checking for readline in -ledit... " >&6; } +if test ${ac_cv_lib_edit_readline+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-ledit $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char readline (); +int +main (void) +{ +return readline (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_edit_readline=yes +else $as_nop + ac_cv_lib_edit_readline=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_edit_readline" >&5 +printf "%s\n" "$ac_cv_lib_edit_readline" >&6; } +if test "x$ac_cv_lib_edit_readline" = xyes +then : + LIBEDIT="-ledit" +fi + +fi + +PCRE2_STATIC_CFLAG="" +if test "x$enable_shared" = "xno" ; then + +printf "%s\n" "#define PCRE2_STATIC 1" >>confdefs.h + + PCRE2_STATIC_CFLAG="-DPCRE2_STATIC" +fi + + +PCRE2POSIX_CFLAG="" +if test "x$enable_shared" = "xyes" ; then + PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED" +fi + + +# Here is where PCRE2-specific defines are handled + +if test "$enable_pcre2_8" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2_8 /**/" >>confdefs.h + +fi + +if test "$enable_pcre2_16" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2_16 /**/" >>confdefs.h + +fi + +if test "$enable_pcre2_32" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2_32 /**/" >>confdefs.h + +fi + +if test "$enable_debug" = "yes"; then + +printf "%s\n" "#define PCRE2_DEBUG /**/" >>confdefs.h + +fi + +if test "$enable_percent_zt" = "no"; then + +printf "%s\n" "#define DISABLE_PERCENT_ZT /**/" >>confdefs.h + +else + enable_percent_zt=auto +fi + +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS" >&5 +printf %s "checking for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char pthread_join (); +int +main (void) +{ +return pthread_join (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work without any flags" >&5 +printf %s "checking whether pthreads work without any flags... " >&6; } + ;; + + -*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether pthreads work with $flag" >&5 +printf %s "checking whether pthreads work with $flag... " >&6; } + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + # Extract the first word of "pthread-config", so it can be a program name with args. +set dummy pthread-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ax_pthread_config+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ax_pthread_config"; then + ac_cv_prog_ax_pthread_config="$ax_pthread_config" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ax_pthread_config="yes" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_prog_ax_pthread_config" && ac_cv_prog_ax_pthread_config="no" +fi +fi +ax_pthread_config=$ac_cv_prog_ax_pthread_config +if test -n "$ax_pthread_config"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_config" >&5 +printf "%s\n" "$ax_pthread_config" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for the pthreads library -l$flag" >&5 +printf %s "checking for the pthreads library -l$flag... " >&6; } + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; } +int +main (void) +{ +pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */ + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_pthread_ok=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_pthread_ok" >&5 +printf "%s\n" "$ax_pthread_ok" >&6; } + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for joinable pthread attribute" >&5 +printf %s "checking for joinable pthread attribute... " >&6; } + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +int attr = $attr; return attr /* ; */ + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + attr_name=$attr; break +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + done + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $attr_name" >&5 +printf "%s\n" "$attr_name" >&6; } + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + +printf "%s\n" "#define PTHREAD_CREATE_JOINABLE $attr_name" >>confdefs.h + + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if more special flags are required for pthreads" >&5 +printf %s "checking if more special flags are required for pthreads... " >&6; } + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + flag="-mt -D_REENTRANT" + fi + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${flag}" >&5 +printf "%s\n" "${flag}" >&6; } + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PTHREAD_PRIO_INHERIT" >&5 +printf %s "checking for PTHREAD_PRIO_INHERIT... " >&6; } +if test ${ax_cv_PTHREAD_PRIO_INHERIT+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include +int +main (void) +{ +int i = PTHREAD_PRIO_INHERIT; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_cv_PTHREAD_PRIO_INHERIT=yes +else $as_nop + ax_cv_PTHREAD_PRIO_INHERIT=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_PTHREAD_PRIO_INHERIT" >&5 +printf "%s\n" "$ax_cv_PTHREAD_PRIO_INHERIT" >&6; } + if test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" +then : + +printf "%s\n" "#define HAVE_PTHREAD_PRIO_INHERIT 1" >>confdefs.h + +fi + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + for ac_prog in xlc_r cc_r +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_PTHREAD_CC+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$PTHREAD_CC"; then + ac_cv_prog_PTHREAD_CC="$PTHREAD_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_PTHREAD_CC="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +PTHREAD_CC=$ac_cv_prog_PTHREAD_CC +if test -n "$PTHREAD_CC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PTHREAD_CC" >&5 +printf "%s\n" "$PTHREAD_CC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + test -n "$PTHREAD_CC" && break +done +test -n "$PTHREAD_CC" || PTHREAD_CC="${CC}" + + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + + + + + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + +printf "%s\n" "#define HAVE_PTHREAD 1" >>confdefs.h + + : +else + ax_pthread_ok=no + as_fn_error $? "JIT support requires pthreads" "$LINENO" 5 +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + +printf "%s\n" "#define SUPPORT_JIT /**/" >>confdefs.h + +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + +printf "%s\n" "#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 1" >>confdefs.h + +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + +printf "%s\n" "#define SUPPORT_PCRE2GREP_JIT /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + as_fn_error $? "Callout script support needs sys/wait.h." "$LINENO" 5 + fi + fi + +printf "%s\n" "#define SUPPORT_PCRE2GREP_CALLOUT_FORK /**/" >>confdefs.h + + fi + +printf "%s\n" "#define SUPPORT_PCRE2GREP_CALLOUT /**/" >>confdefs.h + +else + enable_pcre2grep_callout_fork="no" +fi + +if test "$enable_unicode" = "yes"; then + +printf "%s\n" "#define SUPPORT_UNICODE /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_libz" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBZ /**/" >>confdefs.h + +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBBZ2 /**/" >>confdefs.h + +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192" >&5 +printf "%s\n" "$as_me: WARNING: $with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192" >&2;} + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-bufsize" "$LINENO" 5 + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + as_fn_error $? "Bad value for --with-pcre2grep-max-bufsize" "$LINENO" 5 + fi +fi + + +printf "%s\n" "#define PCRE2GREP_BUFSIZE $with_pcre2grep_bufsize" >>confdefs.h + + + +printf "%s\n" "#define PCRE2GREP_MAX_BUFSIZE $with_pcre2grep_max_bufsize" >>confdefs.h + + +if test "$enable_pcre2test_libedit" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBEDIT /**/" >>confdefs.h + + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + +printf "%s\n" "#define SUPPORT_LIBREADLINE /**/" >>confdefs.h + +fi + + +printf "%s\n" "#define NEWLINE_DEFAULT $ac_pcre2_newline_value" >>confdefs.h + + +if test "$enable_bsr_anycrlf" = "yes"; then + +printf "%s\n" "#define BSR_ANYCRLF /**/" >>confdefs.h + +fi + +if test "$enable_never_backslash_C" = "yes"; then + +printf "%s\n" "#define NEVER_BACKSLASH_C /**/" >>confdefs.h + +fi + + +printf "%s\n" "#define LINK_SIZE $with_link_size" >>confdefs.h + + + +printf "%s\n" "#define MAX_VARLOOKBEHIND $with_max_varlookbehind" >>confdefs.h + + + +printf "%s\n" "#define PARENS_NEST_LIMIT $with_parens_nest_limit" >>confdefs.h + + + +printf "%s\n" "#define MATCH_LIMIT $with_match_limit" >>confdefs.h + + +# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth + +if test "$with_match_limit_recursion" != "UNSET"; then +cat <>confdefs.h + + + +printf "%s\n" "#define HEAP_LIMIT $with_heap_limit" >>confdefs.h + + + +printf "%s\n" "#define MAX_NAME_SIZE 128" >>confdefs.h + + + +printf "%s\n" "#define MAX_NAME_COUNT 10000" >>confdefs.h + + + + +if test "$enable_ebcdic" = "yes"; then + +printf "%s\n" "#define EBCDIC /**/" >>confdefs.h + +fi + +if test "$enable_ebcdic_nl25" = "yes"; then + +printf "%s\n" "#define EBCDIC_NL25 /**/" >>confdefs.h + +fi + +if test "$enable_valgrind" = "yes"; then + +printf "%s\n" "#define SUPPORT_VALGRIND /**/" >>confdefs.h + +fi + +# Platform specific issues +NO_UNDEFINED= +EXPORT_ALL_SYMBOLS= +case $host_os in + cygwin* | mingw* ) + if test X"$enable_shared" = Xyes; then + NO_UNDEFINED="-no-undefined" + EXPORT_ALL_SYMBOLS="-Wl,--export-all-symbols" + fi + ;; +esac + +# The extra LDFLAGS for each particular library. The libpcre2*_version values +# are m4 variables, assigned above. + +EXTRA_LIBPCRE2_8_LDFLAGS="$EXTRA_LIBPCRE2_8_LDFLAGS \ + $NO_UNDEFINED -version-info 14:0:14" + +EXTRA_LIBPCRE2_16_LDFLAGS="$EXTRA_LIBPCRE2_16_LDFLAGS \ + $NO_UNDEFINED -version-info 14:0:14" + +EXTRA_LIBPCRE2_32_LDFLAGS="$EXTRA_LIBPCRE2_32_LDFLAGS \ + $NO_UNDEFINED -version-info 14:0:14" + +EXTRA_LIBPCRE2_POSIX_LDFLAGS="$EXTRA_LIBPCRE2_POSIX_LDFLAGS \ + $NO_UNDEFINED -version-info 3:6:0" + + + + + + +# When we run 'make distcheck', use these arguments. Turning off compiler +# optimization makes it run faster. +DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit" + + +# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is +# specified, the relevant library is available. + +if test "$enable_pcre2grep_libz" = "yes"; then + if test "$HAVE_ZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because zlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBZ" != "1"; then + echo "** Cannot --enable-pcre2grep-libz because libz was not found" + exit 1 + fi + LIBZ="-lz" +fi + + +if test "$enable_pcre2grep_libbz2" = "yes"; then + if test "$HAVE_BZLIB_H" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because bzlib.h was not found" + exit 1 + fi + if test "$HAVE_LIBBZ2" != "1"; then + echo "** Cannot --enable-pcre2grep-libbz2 because libbz2 was not found" + exit 1 + fi + LIBBZ2="-lbz2" +fi + + +# Similarly for --enable-pcre2test-readline + +if test "$enable_pcre2test_libedit" = "yes"; then + if test "$enable_pcre2test_libreadline" = "yes"; then + echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline" + exit 1 + fi + if test -z "$HAVE_LIBEDIT_HEADER"; then + echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h," + echo "** edit/readline/readline.h nor a compatible header was found." + exit 1 + fi + if test -z "$LIBEDIT"; then + echo "** Cannot --enable-pcre2test-libedit because libedit library was not found." + exit 1 + fi +fi + +if test "$enable_pcre2test_libreadline" = "yes"; then + if test "$HAVE_READLINE_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/readline.h was not found." + exit 1 + fi + if test "$HAVE_HISTORY_H" != "1"; then + echo "** Cannot --enable-pcre2test-readline because readline/history.h was not found." + exit 1 + fi + if test -z "$LIBREADLINE"; then + echo "** Cannot --enable-pcre2test-readline because readline library was not found." + exit 1 + fi +fi + +# Handle valgrind support + +if test "$enable_valgrind" = "yes"; then + + + + + + + +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. +set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +PKG_CONFIG=$ac_cv_path_PKG_CONFIG +if test -n "$PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5 +printf "%s\n" "$PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + +fi +if test -z "$ac_cv_path_PKG_CONFIG"; then + ac_pt_PKG_CONFIG=$PKG_CONFIG + # Extract the first word of "pkg-config", so it can be a program name with args. +set dummy pkg-config; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ac_pt_PKG_CONFIG+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $ac_pt_PKG_CONFIG in + [\\/]* | ?:[\\/]*) + ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ac_pt_PKG_CONFIG="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + ;; +esac +fi +ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG +if test -n "$ac_pt_PKG_CONFIG"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5 +printf "%s\n" "$ac_pt_PKG_CONFIG" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + if test "x$ac_pt_PKG_CONFIG" = x; then + PKG_CONFIG="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + PKG_CONFIG=$ac_pt_PKG_CONFIG + fi +else + PKG_CONFIG="$ac_cv_path_PKG_CONFIG" +fi + +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=0.9.0 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5 +printf %s "checking pkg-config is at least version $_pkg_min_version... " >&6; } + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + PKG_CONFIG="" + fi +fi + +pkg_failed=no +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for valgrind" >&5 +printf %s "checking for valgrind... " >&6; } + +if test -n "$VALGRIND_CFLAGS"; then + pkg_cv_VALGRIND_CFLAGS="$VALGRIND_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"valgrind\""; } >&5 + ($PKG_CONFIG --exists --print-errors "valgrind") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_VALGRIND_CFLAGS=`$PKG_CONFIG --cflags "valgrind" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$VALGRIND_LIBS"; then + pkg_cv_VALGRIND_LIBS="$VALGRIND_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"valgrind\""; } >&5 + ($PKG_CONFIG --exists --print-errors "valgrind") 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_VALGRIND_LIBS=`$PKG_CONFIG --libs "valgrind" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + VALGRIND_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "valgrind" 2>&1` + else + VALGRIND_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "valgrind" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$VALGRIND_PKG_ERRORS" >&5 + + as_fn_error $? "Package requirements (valgrind) were not met: + +$VALGRIND_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +Alternatively, you may set the environment variables VALGRIND_CFLAGS +and VALGRIND_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details." "$LINENO" 5 +elif test $pkg_failed = untried; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +Alternatively, you may set the environment variables VALGRIND_CFLAGS +and VALGRIND_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details. + +To get pkg-config, see . +See \`config.log' for more details" "$LINENO" 5; } +else + VALGRIND_CFLAGS=$pkg_cv_VALGRIND_CFLAGS + VALGRIND_LIBS=$pkg_cv_VALGRIND_LIBS + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + +fi +fi + +# Handle code coverage reporting support +if test "$enable_coverage" = "yes"; then + if test "x$GCC" != "xyes"; then + as_fn_error $? "Code coverage reports can only be generated when using GCC" "$LINENO" 5 + fi + + # ccache is incompatible with gcov + # Extract the first word of "shtool", so it can be a program name with args. +set dummy shtool; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_SHTOOL+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $SHTOOL in + [\\/]* | ?:[\\/]*) + ac_cv_path_SHTOOL="$SHTOOL" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_SHTOOL="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_SHTOOL" && ac_cv_path_SHTOOL="false" + ;; +esac +fi +SHTOOL=$ac_cv_path_SHTOOL +if test -n "$SHTOOL"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $SHTOOL" >&5 +printf "%s\n" "$SHTOOL" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + case `$SHTOOL path $CC` in + *ccache*) cc_ccache=yes;; + *) cc_ccache=no;; + esac + + if test "$cc_ccache" = "yes"; then + if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then + as_fn_error $? "must export CCACHE_DISABLE=1 to disable ccache for code coverage" "$LINENO" 5 + fi + fi + + + # Extract the first word of "lcov", so it can be a program name with args. +set dummy lcov; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_LCOV+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $LCOV in + [\\/]* | ?:[\\/]*) + ac_cv_path_LCOV="$LCOV" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_LCOV="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_LCOV" && ac_cv_path_LCOV="false" + ;; +esac +fi +LCOV=$ac_cv_path_LCOV +if test -n "$LCOV"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LCOV" >&5 +printf "%s\n" "$LCOV" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$LCOV" = "xfalse"; then + as_fn_error $? "lcov not found" "$LINENO" 5 + fi + + + # Extract the first word of "genhtml", so it can be a program name with args. +set dummy genhtml; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_GENHTML+y} +then : + printf %s "(cached) " >&6 +else $as_nop + case $GENHTML in + [\\/]* | ?:[\\/]*) + ac_cv_path_GENHTML="$GENHTML" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_GENHTML="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_GENHTML" && ac_cv_path_GENHTML="false" + ;; +esac +fi +GENHTML=$ac_cv_path_GENHTML +if test -n "$GENHTML"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $GENHTML" >&5 +printf "%s\n" "$GENHTML" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$GENHTML" = "xfalse"; then + as_fn_error $? "genhtml not found" "$LINENO" 5 + fi + + # Set flags needed for gcov + GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage" + GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage" + GCOV_LIBS="-lgcov" + + + +fi # enable_coverage + + if test "x$enable_coverage" = "xyes"; then + WITH_GCOV_TRUE= + WITH_GCOV_FALSE='#' +else + WITH_GCOV_TRUE='#' + WITH_GCOV_FALSE= +fi + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether Intel CET is enabled" >&5 +printf %s "checking whether Intel CET is enabled... " >&6; } +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ +#ifndef __CET__ +# error CET is not enabled +#endif + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + pcre2_cc_cv_intel_cet_enabled=yes +else $as_nop + pcre2_cc_cv_intel_cet_enabled=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pcre2_cc_cv_intel_cet_enabled" >&5 +printf "%s\n" "$pcre2_cc_cv_intel_cet_enabled" >&6; } +if test "$pcre2_cc_cv_intel_cet_enabled" = yes; then + CET_CFLAGS="-mshstk" + +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# LIB_POSTFIX is used by CMakeLists.txt for Windows debug builds. +# Pass empty LIB_POSTFIX to *.pc files and pcre2-config here. + + +# Produce these files, in addition to config.h. + +ac_config_files="$ac_config_files Makefile libpcre2-8.pc libpcre2-16.pc libpcre2-32.pc libpcre2-posix.pc pcre2-config src/pcre2.h" + + +# Make the generated script files executable. +ac_config_commands="$ac_config_commands script-chmod" + + +# Make sure that pcre2_chartables.c is removed in case the method for +# creating it was changed by reconfiguration. +ac_config_commands="$ac_config_commands delete-old-chartables" + + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking that generated files are newer than configure" >&5 +printf %s "checking that generated files are newer than configure... " >&6; } + if test -n "$am_sleep_pid"; then + # Hide warnings about reused PIDs. + wait $am_sleep_pid 2>/dev/null + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: done" >&5 +printf "%s\n" "done" >&6; } + if test -n "$EXEEXT"; then + am__EXEEXT_TRUE= + am__EXEEXT_FALSE='#' +else + am__EXEEXT_TRUE='#' + am__EXEEXT_FALSE= +fi + +if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then + as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then + as_fn_error $? "conditional \"AMDEP\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then + as_fn_error $? "conditional \"am__fastdepCC\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_PCRE2_8_TRUE}" && test -z "${WITH_PCRE2_8_FALSE}"; then + as_fn_error $? "conditional \"WITH_PCRE2_8\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_PCRE2_16_TRUE}" && test -z "${WITH_PCRE2_16_FALSE}"; then + as_fn_error $? "conditional \"WITH_PCRE2_16\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_PCRE2_32_TRUE}" && test -z "${WITH_PCRE2_32_FALSE}"; then + as_fn_error $? "conditional \"WITH_PCRE2_32\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_REBUILD_CHARTABLES_TRUE}" && test -z "${WITH_REBUILD_CHARTABLES_FALSE}"; then + as_fn_error $? "conditional \"WITH_REBUILD_CHARTABLES\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_JIT_TRUE}" && test -z "${WITH_JIT_FALSE}"; then + as_fn_error $? "conditional \"WITH_JIT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_UNICODE_TRUE}" && test -z "${WITH_UNICODE_FALSE}"; then + as_fn_error $? "conditional \"WITH_UNICODE\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_VALGRIND_TRUE}" && test -z "${WITH_VALGRIND_FALSE}"; then + as_fn_error $? "conditional \"WITH_VALGRIND\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_DIFF_FUZZ_SUPPORT_TRUE}" && test -z "${WITH_DIFF_FUZZ_SUPPORT_FALSE}"; then + as_fn_error $? "conditional \"WITH_DIFF_FUZZ_SUPPORT\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${WITH_GCOV_TRUE}" && test -z "${WITH_GCOV_FALSE}"; then + as_fn_error $? "conditional \"WITH_GCOV\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else $as_nop + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. +as_nl=' +' +export as_nl +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi + +# The user is always right. +if ${PATH_SEPARATOR+false} :; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + printf "%s\n" "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else $as_nop + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else $as_nop + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by PCRE2 $as_me 10.45, which was +generated by GNU Autoconf 2.71. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" +config_commands="$ac_config_commands" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Configuration commands: +$config_commands + +Report bugs to the package provider." + +_ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config='$ac_cs_config_escaped' +ac_cs_version="\\ +PCRE2 config.status 10.45 +configured by $0, generated by GNU Autoconf 2.71, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2021 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +INSTALL='$INSTALL' +MKDIR_P='$MKDIR_P' +AWK='$AWK' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + printf "%s\n" "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + printf "%s\n" "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + printf "%s\n" "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + printf "%s\n" "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# +# INIT-COMMANDS +# +AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}" + + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' +macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' +AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`' +DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' +OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' +enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' +enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' +pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' +enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' +shared_archive_member_spec='`$ECHO "$shared_archive_member_spec" | $SED "$delay_single_quote_subst"`' +SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' +ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' +PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' +host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' +host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' +host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' +build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' +build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' +build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' +SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' +Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' +GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' +EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' +FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' +LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' +NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' +LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' +max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' +ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' +exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' +lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' +lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' +lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' +lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' +lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' +reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' +reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' +FILECMD='`$ECHO "$FILECMD" | $SED "$delay_single_quote_subst"`' +deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' +file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' +file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' +want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' +sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' +AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' +lt_ar_flags='`$ECHO "$lt_ar_flags" | $SED "$delay_single_quote_subst"`' +AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' +archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' +STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' +RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' +old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' +old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' +lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' +CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' +CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' +compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' +GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_import='`$ECHO "$lt_cv_sys_global_symbol_to_import" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' +lt_cv_nm_interface='`$ECHO "$lt_cv_nm_interface" | $SED "$delay_single_quote_subst"`' +nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' +lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' +lt_cv_truncate_bin='`$ECHO "$lt_cv_truncate_bin" | $SED "$delay_single_quote_subst"`' +objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' +MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' +lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' +lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' +need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' +MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' +DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' +NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' +LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' +OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' +OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' +libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' +shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' +extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' +enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' +export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' +whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' +compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' +old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' +old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' +archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' +archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' +module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' +module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' +with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' +allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' +no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' +hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' +hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' +hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' +hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' +hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' +hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' +inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' +link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' +always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' +export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' +exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' +include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' +prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' +postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' +file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' +variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' +need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' +need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' +version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' +runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' +shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' +libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' +library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' +soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' +install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' +postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' +postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' +finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' +finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' +hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' +sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' +configure_time_dlsearch_path='`$ECHO "$configure_time_dlsearch_path" | $SED "$delay_single_quote_subst"`' +configure_time_lt_sys_library_path='`$ECHO "$configure_time_lt_sys_library_path" | $SED "$delay_single_quote_subst"`' +hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' +enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' +enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' +old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' +striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' + +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in AS \ +DLLTOOL \ +OBJDUMP \ +SHELL \ +ECHO \ +PATH_SEPARATOR \ +SED \ +GREP \ +EGREP \ +FGREP \ +LD \ +NM \ +LN_S \ +lt_SP2NL \ +lt_NL2SP \ +reload_flag \ +FILECMD \ +deplibs_check_method \ +file_magic_cmd \ +file_magic_glob \ +want_nocaseglob \ +sharedlib_from_linklib_cmd \ +AR \ +archiver_list_spec \ +STRIP \ +RANLIB \ +CC \ +CFLAGS \ +compiler \ +lt_cv_sys_global_symbol_pipe \ +lt_cv_sys_global_symbol_to_cdecl \ +lt_cv_sys_global_symbol_to_import \ +lt_cv_sys_global_symbol_to_c_name_address \ +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ +lt_cv_nm_interface \ +nm_file_list_spec \ +lt_cv_truncate_bin \ +lt_prog_compiler_no_builtin_flag \ +lt_prog_compiler_pic \ +lt_prog_compiler_wl \ +lt_prog_compiler_static \ +lt_cv_prog_compiler_c_o \ +need_locks \ +MANIFEST_TOOL \ +DSYMUTIL \ +NMEDIT \ +LIPO \ +OTOOL \ +OTOOL64 \ +shrext_cmds \ +export_dynamic_flag_spec \ +whole_archive_flag_spec \ +compiler_needs_object \ +with_gnu_ld \ +allow_undefined_flag \ +no_undefined_flag \ +hardcode_libdir_flag_spec \ +hardcode_libdir_separator \ +exclude_expsyms \ +include_expsyms \ +file_list_spec \ +variables_saved_for_relink \ +libname_spec \ +library_names_spec \ +soname_spec \ +install_override_mode \ +finish_eval \ +old_striplib \ +striplib; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in reload_cmds \ +old_postinstall_cmds \ +old_postuninstall_cmds \ +old_archive_cmds \ +extract_expsyms_cmds \ +old_archive_from_new_cmds \ +old_archive_from_expsyms_cmds \ +archive_cmds \ +archive_expsym_cmds \ +module_cmds \ +module_expsym_cmds \ +export_symbols_cmds \ +prelink_cmds \ +postlink_cmds \ +postinstall_cmds \ +postuninstall_cmds \ +finish_cmds \ +sys_lib_search_path_spec \ +configure_time_dlsearch_path \ +configure_time_lt_sys_library_path; do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[\\\\\\\`\\"\\\$]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +ac_aux_dir='$ac_aux_dir' + +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + + + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile' + + + + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "src/config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/config.h" ;; + "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + "libpcre2-8.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-8.pc" ;; + "libpcre2-16.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-16.pc" ;; + "libpcre2-32.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-32.pc" ;; + "libpcre2-posix.pc") CONFIG_FILES="$CONFIG_FILES libpcre2-posix.pc" ;; + "pcre2-config") CONFIG_FILES="$CONFIG_FILES pcre2-config" ;; + "src/pcre2.h") CONFIG_FILES="$CONFIG_FILES src/pcre2.h" ;; + "script-chmod") CONFIG_COMMANDS="$CONFIG_COMMANDS script-chmod" ;; + "delete-old-chartables") CONFIG_COMMANDS="$CONFIG_COMMANDS delete-old-chartables" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files + test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers + test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS" +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`printf "%s\n" "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + + case $INSTALL in + [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; + *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; + esac + ac_MKDIR_P=$MKDIR_P + case $MKDIR_P in + [\\/$]* | ?:[\\/]* ) ;; + */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; + esac +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +s&@INSTALL@&$ac_INSTALL&;t t +s&@MKDIR_P@&$ac_MKDIR_P&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +printf "%s\n" "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + printf "%s\n" "/* $configure_input */" >&1 \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi +# Compute "$ac_file"'s index in $config_headers. +_am_arg="$ac_file" +_am_stamp_count=1 +for _am_header in $config_headers :; do + case $_am_header in + $_am_arg | $_am_arg:* ) + break ;; + * ) + _am_stamp_count=`expr $_am_stamp_count + 1` ;; + esac +done +echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || +$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$_am_arg" : 'X\(//\)[^/]' \| \ + X"$_am_arg" : 'X\(//\)$' \| \ + X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$_am_arg" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'`/stamp-h$_am_stamp_count + ;; + + :C) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 +printf "%s\n" "$as_me: executing $ac_file commands" >&6;} + ;; + esac + + + case $ac_file$ac_mode in + "depfiles":C) test x"$AMDEP_TRUE" != x"" || { + # Older Autoconf quotes --file arguments for eval, but not when files + # are listed without --file. Let's play safe and only enable the eval + # if we detect the quoting. + # TODO: see whether this extra hack can be removed once we start + # requiring Autoconf 2.70 or later. + case $CONFIG_FILES in #( + *\'*) : + eval set x "$CONFIG_FILES" ;; #( + *) : + set x $CONFIG_FILES ;; #( + *) : + ;; +esac + shift + # Used to flag and report bootstrapping failures. + am_rc=0 + for am_mf + do + # Strip MF so we end up with the name of the file. + am_mf=`printf "%s\n" "$am_mf" | sed -e 's/:.*$//'` + # Check whether this is an Automake generated Makefile which includes + # dependency-tracking related rules and includes. + # Grep'ing the whole file directly is not great: AIX grep has a line + # limit of 2048, but all sed's we know have understand at least 4000. + sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \ + || continue + am_dirpart=`$as_dirname -- "$am_mf" || +$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$am_mf" : 'X\(//\)[^/]' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X"$am_mf" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + am_filepart=`$as_basename -- "$am_mf" || +$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \ + X"$am_mf" : 'X\(//\)$' \| \ + X"$am_mf" : 'X\(/\)' \| . 2>/dev/null || +printf "%s\n" X/"$am_mf" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + { echo "$as_me:$LINENO: cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles" >&5 + (cd "$am_dirpart" \ + && sed -e '/# am--include-marker/d' "$am_filepart" \ + | $MAKE -f - am--depfiles) >&5 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } || am_rc=$? + done + if test $am_rc -ne 0; then + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "Something went wrong bootstrapping makefile fragments + for automatic dependency tracking. If GNU make was not used, consider + re-running the configure script with MAKE=\"gmake\" (or whatever is + necessary). You can also try re-running configure with the + '--disable-dependency-tracking' option to at least be able to build + the package (albeit without support for automatic dependency tracking). +See \`config.log' for more details" "$LINENO" 5; } + fi + { am_dirpart=; unset am_dirpart;} + { am_filepart=; unset am_filepart;} + { am_mf=; unset am_mf;} + { am_rc=; unset am_rc;} + rm -f conftest-deps.mk +} + ;; + "libtool":C) + + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +# Copyright (C) 2014 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# The names of the tagged configurations supported by this script. +available_tags='' + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG + +# Which release of libtool.m4 was used? +macro_version=$macro_version +macro_revision=$macro_revision + +# Assembler program. +AS=$lt_AS + +# DLL creation program. +DLLTOOL=$lt_DLLTOOL + +# Object dumper program. +OBJDUMP=$lt_OBJDUMP + +# Whether or not to build shared libraries. +build_libtool_libs=$enable_shared + +# Whether or not to build static libraries. +build_old_libs=$enable_static + +# What type of objects to build. +pic_mode=$pic_mode + +# Whether or not to optimize for fast installation. +fast_install=$enable_fast_install + +# Shared archive member basename,for filename based shared library versioning on AIX. +shared_archive_member_spec=$shared_archive_member_spec + +# Shell to use when invoking shell scripts. +SHELL=$lt_SHELL + +# An echo program that protects backslashes. +ECHO=$lt_ECHO + +# The PATH separator for the build system. +PATH_SEPARATOR=$lt_PATH_SEPARATOR + +# The host system. +host_alias=$host_alias +host=$host +host_os=$host_os + +# The build system. +build_alias=$build_alias +build=$build +build_os=$build_os + +# A sed program that does not truncate output. +SED=$lt_SED + +# Sed that helps us avoid accidentally triggering echo(1) options like -n. +Xsed="\$SED -e 1s/^X//" + +# A grep program that handles long lines. +GREP=$lt_GREP + +# An ERE matcher. +EGREP=$lt_EGREP + +# A literal string matcher. +FGREP=$lt_FGREP + +# A BSD- or MS-compatible name lister. +NM=$lt_NM + +# Whether we need soft or hard links. +LN_S=$lt_LN_S + +# What is the maximum length of a command? +max_cmd_len=$max_cmd_len + +# Object file suffix (normally "o"). +objext=$ac_objext + +# Executable file suffix (normally ""). +exeext=$exeext + +# whether the shell understands "unset". +lt_unset=$lt_unset + +# turn spaces into newlines. +SP2NL=$lt_lt_SP2NL + +# turn newlines into spaces. +NL2SP=$lt_lt_NL2SP + +# convert \$build file names to \$host format. +to_host_file_cmd=$lt_cv_to_host_file_cmd + +# convert \$build files to toolchain format. +to_tool_file_cmd=$lt_cv_to_tool_file_cmd + +# A file(cmd) program that detects file types. +FILECMD=$lt_FILECMD + +# Method to check whether dependent libraries are shared objects. +deplibs_check_method=$lt_deplibs_check_method + +# Command to use when deplibs_check_method = "file_magic". +file_magic_cmd=$lt_file_magic_cmd + +# How to find potential files when deplibs_check_method = "file_magic". +file_magic_glob=$lt_file_magic_glob + +# Find potential files using nocaseglob when deplibs_check_method = "file_magic". +want_nocaseglob=$lt_want_nocaseglob + +# Command to associate shared and link libraries. +sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd + +# The archiver. +AR=$lt_AR + +# Flags to create an archive (by configure). +lt_ar_flags=$lt_ar_flags + +# Flags to create an archive. +AR_FLAGS=\${ARFLAGS-"\$lt_ar_flags"} + +# How to feed a file listing to the archiver. +archiver_list_spec=$lt_archiver_list_spec + +# A symbol stripping program. +STRIP=$lt_STRIP + +# Commands used to install an old-style archive. +RANLIB=$lt_RANLIB +old_postinstall_cmds=$lt_old_postinstall_cmds +old_postuninstall_cmds=$lt_old_postuninstall_cmds + +# Whether to use a lock for old archive extraction. +lock_old_archive_extraction=$lock_old_archive_extraction + +# A C compiler. +LTCC=$lt_CC + +# LTCC compiler flags. +LTCFLAGS=$lt_CFLAGS + +# Take the output of nm and produce a listing of raw symbols and C names. +global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe + +# Transform the output of nm in a proper C declaration. +global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl + +# Transform the output of nm into a list of symbols to manually relocate. +global_symbol_to_import=$lt_lt_cv_sys_global_symbol_to_import + +# Transform the output of nm in a C name address pair. +global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address + +# Transform the output of nm in a C name address pair when lib prefix is needed. +global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix + +# The name lister interface. +nm_interface=$lt_lt_cv_nm_interface + +# Specify filename containing input files for \$NM. +nm_file_list_spec=$lt_nm_file_list_spec + +# The root where to search for dependent libraries,and where our libraries should be installed. +lt_sysroot=$lt_sysroot + +# Command to truncate a binary pipe. +lt_truncate_bin=$lt_lt_cv_truncate_bin + +# The name of the directory that contains temporary libtool files. +objdir=$objdir + +# Used to examine libraries when file_magic_cmd begins with "file". +MAGIC_CMD=$MAGIC_CMD + +# Must we lock files when doing compilation? +need_locks=$lt_need_locks + +# Manifest tool. +MANIFEST_TOOL=$lt_MANIFEST_TOOL + +# Tool to manipulate archived DWARF debug symbol files on Mac OS X. +DSYMUTIL=$lt_DSYMUTIL + +# Tool to change global to local symbols on Mac OS X. +NMEDIT=$lt_NMEDIT + +# Tool to manipulate fat objects and archives on Mac OS X. +LIPO=$lt_LIPO + +# ldd/readelf like tool for Mach-O binaries on Mac OS X. +OTOOL=$lt_OTOOL + +# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. +OTOOL64=$lt_OTOOL64 + +# Old archive suffix (normally "a"). +libext=$libext + +# Shared library suffix (normally ".so"). +shrext_cmds=$lt_shrext_cmds + +# The commands to extract the exported symbol list from a shared archive. +extract_expsyms_cmds=$lt_extract_expsyms_cmds + +# Variables whose values should be saved in libtool wrapper scripts and +# restored at link time. +variables_saved_for_relink=$lt_variables_saved_for_relink + +# Do we need the "lib" prefix for modules? +need_lib_prefix=$need_lib_prefix + +# Do we need a version for libraries? +need_version=$need_version + +# Library versioning type. +version_type=$version_type + +# Shared library runtime path variable. +runpath_var=$runpath_var + +# Shared library path variable. +shlibpath_var=$shlibpath_var + +# Is shlibpath searched before the hard-coded library search path? +shlibpath_overrides_runpath=$shlibpath_overrides_runpath + +# Format of library name prefix. +libname_spec=$lt_libname_spec + +# List of archive names. First name is the real one, the rest are links. +# The last name is the one that the linker finds with -lNAME +library_names_spec=$lt_library_names_spec + +# The coded name of the library, if different from the real name. +soname_spec=$lt_soname_spec + +# Permission mode override for installation of shared libraries. +install_override_mode=$lt_install_override_mode + +# Command to use after installation of a shared archive. +postinstall_cmds=$lt_postinstall_cmds + +# Command to use after uninstallation of a shared archive. +postuninstall_cmds=$lt_postuninstall_cmds + +# Commands used to finish a libtool library installation in a directory. +finish_cmds=$lt_finish_cmds + +# As "finish_cmds", except a single script fragment to be evaled but +# not shown. +finish_eval=$lt_finish_eval + +# Whether we should hardcode library paths into libraries. +hardcode_into_libs=$hardcode_into_libs + +# Compile-time system search path for libraries. +sys_lib_search_path_spec=$lt_sys_lib_search_path_spec + +# Detected run-time system search path for libraries. +sys_lib_dlsearch_path_spec=$lt_configure_time_dlsearch_path + +# Explicit LT_SYS_LIBRARY_PATH set during ./configure time. +configure_time_lt_sys_library_path=$lt_configure_time_lt_sys_library_path + +# Whether dlopen is supported. +dlopen_support=$enable_dlopen + +# Whether dlopen of programs is supported. +dlopen_self=$enable_dlopen_self + +# Whether dlopen of statically linked programs is supported. +dlopen_self_static=$enable_dlopen_self_static + +# Commands to strip libraries. +old_striplib=$lt_old_striplib +striplib=$lt_striplib + + +# The linker used to build libraries. +LD=$lt_LD + +# How to create reloadable object files. +reload_flag=$lt_reload_flag +reload_cmds=$lt_reload_cmds + +# Commands used to build an old-style archive. +old_archive_cmds=$lt_old_archive_cmds + +# A language specific compiler. +CC=$lt_compiler + +# Is the compiler the GNU compiler? +with_gcc=$GCC + +# Compiler flag to turn off builtin functions. +no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag + +# Additional compiler flags for building library objects. +pic_flag=$lt_lt_prog_compiler_pic + +# How to pass a linker flag through the compiler. +wl=$lt_lt_prog_compiler_wl + +# Compiler flag to prevent dynamic linking. +link_static_flag=$lt_lt_prog_compiler_static + +# Does compiler simultaneously support -c and -o options? +compiler_c_o=$lt_lt_cv_prog_compiler_c_o + +# Whether or not to add -lc for building shared libraries. +build_libtool_need_lc=$archive_cmds_need_lc + +# Whether or not to disallow shared libs when runtime libs are static. +allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes + +# Compiler flag to allow reflexive dlopens. +export_dynamic_flag_spec=$lt_export_dynamic_flag_spec + +# Compiler flag to generate shared objects directly from archives. +whole_archive_flag_spec=$lt_whole_archive_flag_spec + +# Whether the compiler copes with passing no objects directly. +compiler_needs_object=$lt_compiler_needs_object + +# Create an old-style archive from a shared archive. +old_archive_from_new_cmds=$lt_old_archive_from_new_cmds + +# Create a temporary old-style archive to link instead of a shared archive. +old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds + +# Commands used to build a shared archive. +archive_cmds=$lt_archive_cmds +archive_expsym_cmds=$lt_archive_expsym_cmds + +# Commands used to build a loadable module if different from building +# a shared archive. +module_cmds=$lt_module_cmds +module_expsym_cmds=$lt_module_expsym_cmds + +# Whether we are building with GNU ld or not. +with_gnu_ld=$lt_with_gnu_ld + +# Flag that allows shared libraries with undefined symbols to be built. +allow_undefined_flag=$lt_allow_undefined_flag + +# Flag that enforces no undefined symbols. +no_undefined_flag=$lt_no_undefined_flag + +# Flag to hardcode \$libdir into a binary during linking. +# This must work even if \$libdir does not exist +hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec + +# Whether we need a single "-rpath" flag with a separated argument. +hardcode_libdir_separator=$lt_hardcode_libdir_separator + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary. +hardcode_direct=$hardcode_direct + +# Set to "yes" if using DIR/libNAME\$shared_ext during linking hardcodes +# DIR into the resulting binary and the resulting library dependency is +# "absolute",i.e impossible to change by setting \$shlibpath_var if the +# library is relocated. +hardcode_direct_absolute=$hardcode_direct_absolute + +# Set to "yes" if using the -LDIR flag during linking hardcodes DIR +# into the resulting binary. +hardcode_minus_L=$hardcode_minus_L + +# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR +# into the resulting binary. +hardcode_shlibpath_var=$hardcode_shlibpath_var + +# Set to "yes" if building a shared library automatically hardcodes DIR +# into the library and all subsequent libraries and executables linked +# against it. +hardcode_automatic=$hardcode_automatic + +# Set to yes if linker adds runtime paths of dependent libraries +# to runtime path list. +inherit_rpath=$inherit_rpath + +# Whether libtool must link a program against all its dependency libraries. +link_all_deplibs=$link_all_deplibs + +# Set to "yes" if exported symbols are required. +always_export_symbols=$always_export_symbols + +# The commands to list exported symbols. +export_symbols_cmds=$lt_export_symbols_cmds + +# Symbols that should not be listed in the preloaded symbols. +exclude_expsyms=$lt_exclude_expsyms + +# Symbols that must always be exported. +include_expsyms=$lt_include_expsyms + +# Commands necessary for linking programs (against libraries) with templates. +prelink_cmds=$lt_prelink_cmds + +# Commands necessary for finishing linking programs. +postlink_cmds=$lt_postlink_cmds + +# Specify filename containing input files. +file_list_spec=$lt_file_list_spec + +# How to hardcode a shared library path into an executable. +hardcode_action=$hardcode_action + +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x$2 in + x) + ;; + *:) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'` \$$1\" + ;; + x:*) + eval $1=\"\$$1 `$ECHO $2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval $1=\"\$$1\ `$ECHO $2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval $1=\"`$ECHO $2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \$$1\" + ;; + *) + eval $1=\"`$ECHO $2 | $SED 's/:/ /g'`\" + ;; + esac +} + + +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in $*""; do + case $cc_temp in + compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; + distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} + + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + + +ltmain=$ac_aux_dir/ltmain.sh + + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + $SED '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" + + ;; + "script-chmod":C) chmod a+x pcre2-config ;; + "delete-old-chartables":C) rm -f pcre2_chartables.c ;; + + esac +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + + +# --disable-stack-for-recursion is obsolete and has no effect. + +if test "$enable_stack_for_recursion" = "no"; then +cat < + #endif + #include + + int a, b; + size_t m; + ]], [[__builtin_mul_overflow(a, b, &m)]])], + [pcre2_cc_cv_builtin_mul_overflow=yes], + [pcre2_cc_cv_builtin_mul_overflow=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow]) +if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then + AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1, + [Define this if your compiler provides __builtin_mul_overflow()]) +fi +AC_LANG_POP([C]) + +# Check for the unreachable() builtin + +AC_MSG_CHECKING([for __builtin_unreachable()]) +AC_LANG_PUSH([C]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[int r;]], [[if (r) __builtin_unreachable()]])], + [pcre2_cc_cv_builtin_unreachable=yes], + [pcre2_cc_cv_builtin_unreachable=no]) +AC_MSG_RESULT([$pcre2_cc_cv_builtin_unreachable]) +if test "$pcre2_cc_cv_builtin_unreachable" = yes; then + AC_DEFINE([HAVE_BUILTIN_UNREACHABLE], 1, + [Define this if your compiler provides __builtin_unreachable()]) +fi +AC_LANG_POP([C]) + +# Versioning + +PCRE2_MAJOR="pcre2_major" +PCRE2_MINOR="pcre2_minor" +PCRE2_PRERELEASE="pcre2_prerelease" +PCRE2_DATE="pcre2_date" + +if test "$PCRE2_MINOR" = "08" -o "$PCRE2_MINOR" = "09" +then + echo "***" + echo "*** Minor version number $PCRE2_MINOR must not be used. ***" + echo "*** Use only 00 to 07 or 10 onwards, to avoid octal issues. ***" + echo "***" + exit 1 +fi + +AC_SUBST(PCRE2_MAJOR) +AC_SUBST(PCRE2_MINOR) +AC_SUBST(PCRE2_PRERELEASE) +AC_SUBST(PCRE2_DATE) + +# Set a more sensible default value for $(htmldir). +if test "x$htmldir" = 'x${docdir}' +then + htmldir='${docdir}/html' +fi + +# Force an error for PCRE1 size options +AC_ARG_ENABLE(pcre8,,,enable_pcre8=no) +AC_ARG_ENABLE(pcre16,,,enable_pcre16=no) +AC_ARG_ENABLE(pcre32,,,enable_pcre32=no) + +if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" +then + echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]" + exit 1 +fi + +# Handle --disable-pcre2-8 (enabled by default) +AC_ARG_ENABLE(pcre2-8, + AS_HELP_STRING([--disable-pcre2-8], + [disable 8 bit character support]), + , enable_pcre2_8=unset) +AC_SUBST(enable_pcre2_8) + +# Handle --enable-pcre2-16 (disabled by default) +AC_ARG_ENABLE(pcre2-16, + AS_HELP_STRING([--enable-pcre2-16], + [enable 16 bit character support]), + , enable_pcre2_16=unset) +AC_SUBST(enable_pcre2_16) + +# Handle --enable-pcre2-32 (disabled by default) +AC_ARG_ENABLE(pcre2-32, + AS_HELP_STRING([--enable-pcre2-32], + [enable 32 bit character support]), + , enable_pcre2_32=unset) +AC_SUBST(enable_pcre2_32) + +# Handle --enable-debug (disabled by default) +AC_ARG_ENABLE(debug, + AS_HELP_STRING([--enable-debug], + [enable debugging code]), + , enable_debug=no) + +# Handle --enable-jit (disabled by default) +AC_ARG_ENABLE(jit, + AS_HELP_STRING([--enable-jit], + [enable Just-In-Time compiling support]), + , enable_jit=no) + +# This code enables JIT if the hardware supports it. +if test "$enable_jit" = "auto"; then + AC_LANG(C) + SAVE_CPPFLAGS=$CPPFLAGS + CPPFLAGS=-I$srcdir + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ + #define SLJIT_CONFIG_AUTO 1 + #include "deps/sljit/sljit_src/sljitConfigCPU.h" + #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + #error unsupported + #endif]])], enable_jit=yes, enable_jit=no) + CPPFLAGS=$SAVE_CPPFLAGS + echo checking for JIT support on this hardware... $enable_jit +fi + +# Handle --enable-jit-sealloc (disabled by default and only experimental) +case $host_os in + linux* | netbsd*) + AC_ARG_ENABLE(jit-sealloc, + AS_HELP_STRING([--enable-jit-sealloc], + [enable SELinux compatible execmem allocator in JIT (experimental)]), + ,enable_jit_sealloc=no) + ;; + *) + enable_jit_sealloc=unsupported + ;; +esac + +# Handle --disable-pcre2grep-jit (enabled by default) +AC_ARG_ENABLE(pcre2grep-jit, + AS_HELP_STRING([--disable-pcre2grep-jit], + [disable JIT support in pcre2grep]), + , enable_pcre2grep_jit=yes) + +# Handle --disable-pcre2grep-callout (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout, + AS_HELP_STRING([--disable-pcre2grep-callout], + [disable callout script support in pcre2grep]), + , enable_pcre2grep_callout=yes) + +# Handle --disable-pcre2grep-callout-fork (enabled by default) +AC_ARG_ENABLE(pcre2grep-callout-fork, + AS_HELP_STRING([--disable-pcre2grep-callout-fork], + [disable callout script fork support in pcre2grep]), + , enable_pcre2grep_callout_fork=yes) + +# Handle --enable-rebuild-chartables +AC_ARG_ENABLE(rebuild-chartables, + AS_HELP_STRING([--enable-rebuild-chartables], + [rebuild character tables in current locale]), + , enable_rebuild_chartables=no) + +# Handle --disable-unicode (enabled by default) +AC_ARG_ENABLE(unicode, + AS_HELP_STRING([--disable-unicode], + [disable Unicode support]), + , enable_unicode=unset) + +# Handle newline options +ac_pcre2_newline=lf +AC_ARG_ENABLE(newline-is-cr, + AS_HELP_STRING([--enable-newline-is-cr], + [use CR as newline character]), + ac_pcre2_newline=cr) +AC_ARG_ENABLE(newline-is-lf, + AS_HELP_STRING([--enable-newline-is-lf], + [use LF as newline character (default)]), + ac_pcre2_newline=lf) +AC_ARG_ENABLE(newline-is-crlf, + AS_HELP_STRING([--enable-newline-is-crlf], + [use CRLF as newline sequence]), + ac_pcre2_newline=crlf) +AC_ARG_ENABLE(newline-is-anycrlf, + AS_HELP_STRING([--enable-newline-is-anycrlf], + [use CR, LF, or CRLF as newline sequence]), + ac_pcre2_newline=anycrlf) +AC_ARG_ENABLE(newline-is-any, + AS_HELP_STRING([--enable-newline-is-any], + [use any valid Unicode newline sequence]), + ac_pcre2_newline=any) +AC_ARG_ENABLE(newline-is-nul, + AS_HELP_STRING([--enable-newline-is-nul], + [use NUL (binary zero) as newline character]), + ac_pcre2_newline=nul) +enable_newline="$ac_pcre2_newline" + +# Handle --enable-bsr-anycrlf +AC_ARG_ENABLE(bsr-anycrlf, + AS_HELP_STRING([--enable-bsr-anycrlf], + [\R matches only CR, LF, CRLF by default]), + , enable_bsr_anycrlf=no) + +# Handle --enable-never-backslash-C +AC_ARG_ENABLE(never-backslash-C, + AS_HELP_STRING([--enable-never-backslash-C], + [use of \C causes an error]), + , enable_never_backslash_C=no) + +# Handle --enable-ebcdic +AC_ARG_ENABLE(ebcdic, + AS_HELP_STRING([--enable-ebcdic], + [assume EBCDIC coding rather than ASCII; incompatible with --enable-unicode; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), + , enable_ebcdic=no) + +# Handle --enable-ebcdic-nl25 +AC_ARG_ENABLE(ebcdic-nl25, + AS_HELP_STRING([--enable-ebcdic-nl25], + [set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]), + , enable_ebcdic_nl25=no) + +# Handle --enable-pcre2grep-libz +AC_ARG_ENABLE(pcre2grep-libz, + AS_HELP_STRING([--enable-pcre2grep-libz], + [link pcre2grep with libz to handle .gz files]), + , enable_pcre2grep_libz=no) + +# Handle --enable-pcre2grep-libbz2 +AC_ARG_ENABLE(pcre2grep-libbz2, + AS_HELP_STRING([--enable-pcre2grep-libbz2], + [link pcre2grep with libbz2 to handle .bz2 files]), + , enable_pcre2grep_libbz2=no) + +# Handle --with-pcre2grep-bufsize=N +AC_ARG_WITH(pcre2grep-bufsize, + AS_HELP_STRING([--with-pcre2grep-bufsize=N], + [pcre2grep initial buffer size (default=20480, minimum=8192)]), + , with_pcre2grep_bufsize=20480) + +# Handle --with-pcre2grep-max-bufsize=N +AC_ARG_WITH(pcre2grep-max-bufsize, + AS_HELP_STRING([--with-pcre2grep-max-bufsize=N], + [pcre2grep maximum buffer size (default=1048576, minimum=8192)]), + , with_pcre2grep_max_bufsize=1048576) + +# Handle --enable-pcre2test-libedit +AC_ARG_ENABLE(pcre2test-libedit, + AS_HELP_STRING([--enable-pcre2test-libedit], + [link pcre2test with libedit]), + , enable_pcre2test_libedit=no) + +# Handle --enable-pcre2test-libreadline +AC_ARG_ENABLE(pcre2test-libreadline, + AS_HELP_STRING([--enable-pcre2test-libreadline], + [link pcre2test with libreadline]), + , enable_pcre2test_libreadline=no) + +# Handle --with-link-size=N +AC_ARG_WITH(link-size, + AS_HELP_STRING([--with-link-size=N], + [internal link size (2, 3, or 4 allowed; default=2)]), + , with_link_size=2) + +# Handle --with-max-varlookbehind=N +AC_ARG_WITH(max-varlookbehind, + AS_HELP_STRING([--with-max-varlookbehind=N], + [maximum length of variable lookbehind (default=255)]), + , with_max_varlookbehind=255) + +# Handle --with-parens-nest-limit=N +AC_ARG_WITH(parens-nest-limit, + AS_HELP_STRING([--with-parens-nest-limit=N], + [nested parentheses limit (default=250)]), + , with_parens_nest_limit=250) + +# Handle --with-heap-limit +AC_ARG_WITH(heap-limit, + AS_HELP_STRING([--with-heap-limit=N], + [default limit on heap memory (kibibytes, default=20000000)]), + , with_heap_limit=20000000) + +# Handle --with-match-limit=N +AC_ARG_WITH(match-limit, + AS_HELP_STRING([--with-match-limit=N], + [default limit on internal looping (default=10000000)]), + , with_match_limit=10000000) + +# Handle --with-match-limit-depth=N +# Recognize old synonym --with-match-limit-recursion +# +# Note: In config.h, the default is to define MATCH_LIMIT_DEPTH symbolically as +# MATCH_LIMIT, which in turn is defined to be some numeric value (e.g. +# 10000000). MATCH_LIMIT_DEPTH can otherwise be set to some different numeric +# value (or even the same numeric value as MATCH_LIMIT, though no longer +# defined in terms of the latter). +# +AC_ARG_WITH(match-limit-depth, + AS_HELP_STRING([--with-match-limit-depth=N], + [default limit on match tree depth (default=MATCH_LIMIT)]), + , with_match_limit_depth=MATCH_LIMIT) + +AC_ARG_WITH(match-limit-recursion,, + , with_match_limit_recursion=UNSET) + +# Handle --enable-valgrind +AC_ARG_ENABLE(valgrind, + AS_HELP_STRING([--enable-valgrind], + [enable valgrind support]), + , enable_valgrind=no) + +# Enable code coverage reports using gcov +AC_ARG_ENABLE(coverage, + AS_HELP_STRING([--enable-coverage], + [enable code coverage reports using gcov]), + , enable_coverage=no) + +# Handle --enable-fuzz-support +AC_ARG_ENABLE(fuzz_support, + AS_HELP_STRING([--enable-fuzz-support], + [enable fuzzer support]), + , enable_fuzz_support=no) + +# Handle --enable-diff-fuzz-support +AC_ARG_ENABLE(diff_fuzz_support, + AS_HELP_STRING([--enable-diff-fuzz-support], + [enable differential fuzzer support]), + , enable_diff_fuzz_support=no) + +# Handle --disable-stack-for-recursion +# This option became obsolete at release 10.30. +AC_ARG_ENABLE(stack-for-recursion,, + , enable_stack_for_recursion=yes) + +# Original code +# AC_ARG_ENABLE(stack-for-recursion, +# AS_HELP_STRING([--disable-stack-for-recursion], +# [don't use stack recursion when matching]), +# , enable_stack_for_recursion=yes) + +# Handle --disable-percent_zt (set as "auto" by default) +AC_ARG_ENABLE(percent-zt, + AS_HELP_STRING([--disable-percent-zt], + [disable the use of z and t formatting modifiers]), + , enable_percent_zt=auto) + +# Set the default value for pcre2-8 +if test "x$enable_pcre2_8" = "xunset" +then + enable_pcre2_8=yes +fi + +# Set the default value for pcre2-16 +if test "x$enable_pcre2_16" = "xunset" +then + enable_pcre2_16=no +fi + +# Set the default value for pcre2-32 +if test "x$enable_pcre2_32" = "xunset" +then + enable_pcre2_32=no +fi + +# Make sure at least one library is selected +if test "x$enable_pcre2_8$enable_pcre2_16$enable_pcre2_32" = "xnonono" +then + AC_MSG_ERROR([At least one of the 8, 16 or 32 bit libraries must be enabled]) +fi + +# Unicode is enabled by default. +if test "x$enable_unicode" = "xunset" +then + enable_unicode=yes +fi + +# Convert the newline identifier into the appropriate integer value. These must +# agree with the PCRE2_NEWLINE_xxx values in pcre2.h. + +case "$enable_newline" in + cr) ac_pcre2_newline_value=1 ;; + lf) ac_pcre2_newline_value=2 ;; + crlf) ac_pcre2_newline_value=3 ;; + any) ac_pcre2_newline_value=4 ;; + anycrlf) ac_pcre2_newline_value=5 ;; + nul) ac_pcre2_newline_value=6 ;; + *) + AC_MSG_ERROR([invalid argument "$enable_newline" to --enable-newline option]) + ;; +esac + +# --enable-ebcdic-nl25 implies --enable-ebcdic +if test "x$enable_ebcdic_nl25" = "xyes"; then + enable_ebcdic=yes +fi + +# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. +# Also check that UTF support is not requested, because PCRE2 cannot handle +# EBCDIC and UTF in the same build. To do so it would need to use different +# character constants depending on the mode. Also, EBCDIC cannot be used with +# 16-bit and 32-bit libraries. +# +if test "x$enable_ebcdic" = "xyes"; then + enable_rebuild_chartables=yes + if test "x$enable_unicode" = "xyes"; then + AC_MSG_ERROR([support for EBCDIC and Unicode cannot be enabled at the same time]) + fi + if test "x$enable_pcre2_16" = "xyes" -o "x$enable_pcre2_32" = "xyes"; then + AC_MSG_ERROR([EBCDIC support is available only for the 8-bit library]) + fi +fi + +# Check argument to --with-link-size +case "$with_link_size" in + 2|3|4) ;; + *) + AC_MSG_ERROR([invalid argument "$with_link_size" to --with-link-size option]) + ;; +esac + +AH_TOP([ +/* PCRE2 is written in Standard C, but there are a few non-standard things it +can cope with, allowing it to run on SunOS4 and other "close to standard" +systems. + +In environments that support the GNU autotools, config.h.in is converted into +config.h by the "configure" script. In environments that use CMake, +config-cmake.in is converted into config.h. If you are going to build PCRE2 "by +hand" without using "configure" or CMake, you should copy the distributed +config.h.generic to config.h, and edit the macro definitions to be the way you +need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, +so that config.h is included at the start of every source. + +Alternatively, you can avoid editing by using -D on the compiler command line +to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H, +but if you do, default values will be taken from config.h for non-boolean +macros that are not defined on the command line. + +Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be +defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All +such macros are listed as a commented #undef in config.h.generic. Macros such +as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are +surrounded by #ifndef/#endif lines so that the value can be overridden by -D. + +PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if +HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make +sure both macros are undefined; an emulation function will then be used. */]) + +# Checks for header files. +AC_CHECK_HEADERS(assert.h limits.h sys/types.h sys/stat.h dirent.h) +AC_CHECK_HEADERS([windows.h], [HAVE_WINDOWS_H=1]) +AC_CHECK_HEADERS([sys/wait.h], [HAVE_SYS_WAIT_H=1]) + +# Conditional compilation +AM_CONDITIONAL(WITH_PCRE2_8, test "x$enable_pcre2_8" = "xyes") +AM_CONDITIONAL(WITH_PCRE2_16, test "x$enable_pcre2_16" = "xyes") +AM_CONDITIONAL(WITH_PCRE2_32, test "x$enable_pcre2_32" = "xyes") +AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") +AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") +AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes") +AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes") +AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes") +AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes") + +if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then + echo "** ERROR: Fuzzer support requires the 8-bit library" + exit 1 +fi + +if test "$enable_diff_fuzz_support" = "yes"; then + if test "$enable_fuzz_support" = "no"; then + echo "** ERROR: Differential fuzzing support requires fuzzing support" + exit 1 + fi + if test "$enable_jit" = "no"; then + echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support" + exit 1 + fi + AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [ + Define to any value to enable differential fuzzing support.]) +fi + +# Checks for typedefs, structures, and compiler characteristics. + +AC_C_CONST +AC_TYPE_SIZE_T + +# Checks for library functions. + +AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror) +AC_MSG_CHECKING([for realpath]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +]],[[ +char buffer[PATH_MAX]; +realpath(".", buffer); +]])], +[AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_REALPATH], 1, + [Define to 1 if you have the `realpath' function.]) +], +AC_MSG_RESULT([no])) + +# Check for the availability of libz (aka zlib) + +AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1]) +AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1]) + +# Check for the availability of libbz2. Originally we just used AC_CHECK_LIB, +# as for libz. However, this had the following problem, diagnosed and fixed by +# a user: +# +# - libbz2 uses the Pascal calling convention (WINAPI) for the functions +# under Win32. +# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h", +# therefore missing the function definition. +# - The compiler thus generates a "C" signature for the test function. +# - The linker fails to find the "C" function. +# - PCRE2 fails to configure if asked to do so against libbz2. +# +# Solution: +# +# - Replace the AC_CHECK_LIB test with a custom test. + +AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1]) +# Original test +# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) +# +# Custom test follows + +AC_MSG_CHECKING([for libbz2]) +OLD_LIBS="$LIBS" +LIBS="$LIBS -lbz2" +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#ifdef HAVE_BZLIB_H +#include +#endif]], +[[return (int)BZ2_bzopen("conftest", "rb");]])], +[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;], +AC_MSG_RESULT([no])) +LIBS="$OLD_LIBS" + +# Check for the availabiity of libreadline + +if test "$enable_pcre2test_libreadline" = "yes"; then + AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1]) + AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1]) + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"], + [unset ac_cv_lib_readline_readline; + AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"], + [LIBREADLINE=""], + [-ltermcap])], + [-lncursesw])], + [-lncurses])], + [-lcurses])], + [-ltinfo])]) + AC_SUBST(LIBREADLINE) + if test -n "$LIBREADLINE"; then + if test "$LIBREADLINE" != "-lreadline"; then + echo "-lreadline needs $LIBREADLINE" + LIBREADLINE="-lreadline $LIBREADLINE" + fi + fi +fi + +# Check for the availability of libedit. Different distributions put its +# headers in different places. Try to cover the most common ones. + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [ + HAVE_LIBEDIT_HEADER=1 + break + ]) + AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"]) +fi + +PCRE2_STATIC_CFLAG="" +if test "x$enable_shared" = "xno" ; then + AC_DEFINE([PCRE2_STATIC], [1], [ + Define to any value if linking statically (TODO: make nice with Libtool)]) + PCRE2_STATIC_CFLAG="-DPCRE2_STATIC" +fi +AC_SUBST(PCRE2_STATIC_CFLAG) + +PCRE2POSIX_CFLAG="" +if test "x$enable_shared" = "xyes" ; then + PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED" +fi +AC_SUBST(PCRE2POSIX_CFLAG) + +# Here is where PCRE2-specific defines are handled + +if test "$enable_pcre2_8" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_8], [], [ + Define to any value to enable the 8 bit PCRE2 library.]) +fi + +if test "$enable_pcre2_16" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_16], [], [ + Define to any value to enable the 16 bit PCRE2 library.]) +fi + +if test "$enable_pcre2_32" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2_32], [], [ + Define to any value to enable the 32 bit PCRE2 library.]) +fi + +if test "$enable_debug" = "yes"; then + AC_DEFINE([PCRE2_DEBUG], [], [ + Define to any value to include debugging code.]) +fi + +if test "$enable_percent_zt" = "no"; then + AC_DEFINE([DISABLE_PERCENT_ZT], [], [ + Define to any value to disable the use of the z and t modifiers in + formatting settings such as %zu or %td (this is rarely needed).]) +else + enable_percent_zt=auto +fi + +# Unless running under Windows, JIT support requires pthreads. + +if test "$enable_jit" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])]) + CC="$PTHREAD_CC" + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + fi + AC_DEFINE([SUPPORT_JIT], [], [ + Define to any value to enable support for Just-In-Time compiling.]) +else + enable_pcre2grep_jit="no" +fi + +if test "$enable_jit_sealloc" = "yes"; then + AC_DEFINE([SLJIT_PROT_EXECUTABLE_ALLOCATOR], [1], [ + Define to any non-zero number to enable support for SELinux + compatible executable memory allocator in JIT. Note that this + will have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_jit" = "yes"; then + AC_DEFINE([SUPPORT_PCRE2GREP_JIT], [], [ + Define to any value to enable JIT support in pcre2grep. Note that this will + have no effect unless SUPPORT_JIT is also defined.]) +fi + +if test "$enable_pcre2grep_callout" = "yes"; then + if test "$enable_pcre2grep_callout_fork" = "yes"; then + if test "$HAVE_WINDOWS_H" != "1"; then + if test "$HAVE_SYS_WAIT_H" != "1"; then + AC_MSG_ERROR([Callout script support needs sys/wait.h.]) + fi + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [ + Define to any value to enable fork support in pcre2grep callout scripts. + This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also + defined.]) + fi + AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [ + Define to any value to enable callout script support in pcre2grep.]) +else + enable_pcre2grep_callout_fork="no" +fi + +if test "$enable_unicode" = "yes"; then + AC_DEFINE([SUPPORT_UNICODE], [], [ + Define to any value to enable support for Unicode and UTF encoding. + This will work even in an EBCDIC environment, but it is incompatible + with the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC + code *or* ASCII/Unicode, but not both at once.]) +fi + +if test "$enable_pcre2grep_libz" = "yes"; then + AC_DEFINE([SUPPORT_LIBZ], [], [ + Define to any value to allow pcre2grep to be linked with libz, so that it is + able to handle .gz files.]) +fi + +if test "$enable_pcre2grep_libbz2" = "yes"; then + AC_DEFINE([SUPPORT_LIBBZ2], [], [ + Define to any value to allow pcre2grep to be linked with libbz2, so that it + is able to handle .bz2 files.]) +fi + +if test $with_pcre2grep_bufsize -lt 8192 ; then + AC_MSG_WARN([$with_pcre2grep_bufsize is too small for --with-pcre2grep-bufsize; using 8192]) + with_pcre2grep_bufsize="8192" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) + fi +fi + +if test $with_pcre2grep_max_bufsize -lt $with_pcre2grep_bufsize ; then + with_pcre2grep_max_bufsize="$with_pcre2grep_bufsize" +else + if test $? -gt 1 ; then + AC_MSG_ERROR([Bad value for --with-pcre2grep-max-bufsize]) + fi +fi + +AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [ + The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by + pcre2grep to hold parts of the file it is searching. The buffer will be + expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very + long lines. The actual amount of memory used by pcre2grep is three times this + number, because it allows for the buffering of "before" and "after" lines.]) + +AC_DEFINE_UNQUOTED([PCRE2GREP_MAX_BUFSIZE], [$with_pcre2grep_max_bufsize], [ + The value of PCRE2GREP_MAX_BUFSIZE specifies the maximum size of the buffer + used by pcre2grep to hold parts of the file it is searching. The actual + amount of memory used by pcre2grep is three times this number, because it + allows for the buffering of "before" and "after" lines.]) + +if test "$enable_pcre2test_libedit" = "yes"; then + AC_DEFINE([SUPPORT_LIBEDIT], [], [ + Define to any value to allow pcre2test to be linked with libedit.]) + LIBREADLINE="$LIBEDIT" +elif test "$enable_pcre2test_libreadline" = "yes"; then + AC_DEFINE([SUPPORT_LIBREADLINE], [], [ + Define to any value to allow pcre2test to be linked with libreadline.]) +fi + +AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [ + The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), + 5 (ANYCRLF), and 6 (NUL).]) + +if test "$enable_bsr_anycrlf" = "yes"; then + AC_DEFINE([BSR_ANYCRLF], [], [ + By default, the \R escape sequence matches any Unicode line ending + character or sequence of characters. If BSR_ANYCRLF is defined (to any + value), this is changed so that backslash-R matches only CR, LF, or CRLF. + The build-time default can be overridden by the user of PCRE2 at runtime.]) +fi + +if test "$enable_never_backslash_C" = "yes"; then + AC_DEFINE([NEVER_BACKSLASH_C], [], [ + Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns.]) +fi + +AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [ + The value of LINK_SIZE determines the number of bytes used to store + links as offsets within the compiled regex. The default is 2, which + allows for compiled patterns up to 65535 code units long. This covers the + vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4 + bytes instead. This allows for longer patterns in extreme cases.]) + +AC_DEFINE_UNQUOTED([MAX_VARLOOKBEHIND], [$with_max_varlookbehind], [ + The value of MAX_VARLOOKBEHIND specifies the default maximum length, in + characters, for a variable-length lookbehind assertion.]) + +AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [ + The value of PARENS_NEST_LIMIT specifies the maximum depth of nested + parentheses (of any kind) in a pattern. This limits the amount of system + stack that is used while compiling a pattern.]) + +AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [ + The value of MATCH_LIMIT determines the default number of times the + pcre2_match() function can record a backtrack position during a single + matching attempt. The value is also used to limit a loop counter in + pcre2_dfa_match(). There is a runtime interface for setting a different + limit. The limit exists in order to catch runaway regular expressions that + take forever to determine that they do not match. The default is set very + large so that it does not accidentally catch legitimate cases.]) + +# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth + +if test "$with_match_limit_recursion" != "UNSET"; then +cat <. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Alexandre Oliva . + +case $1 in + '') + echo "$0: No command. Try '$0 --help' for more information." 1>&2 + exit 1; + ;; + -h | --h*) + cat <<\EOF +Usage: depcomp [--help] [--version] PROGRAM [ARGS] + +Run PROGRAMS ARGS to compile a file, generating dependencies +as side-effects. + +Environment variables: + depmode Dependency tracking mode. + source Source file read by 'PROGRAMS ARGS'. + object Object file output by 'PROGRAMS ARGS'. + DEPDIR directory where to store dependencies. + depfile Dependency file to output. + tmpdepfile Temporary file to use when outputting dependencies. + libtool Whether libtool is used (yes/no). + +Report bugs to . +EOF + exit $? + ;; + -v | --v*) + echo "depcomp $scriptversion" + exit $? + ;; +esac + +# Get the directory component of the given path, and save it in the +# global variables '$dir'. Note that this directory component will +# be either empty or ending with a '/' character. This is deliberate. +set_dir_from () +{ + case $1 in + */*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;; + *) dir=;; + esac +} + +# Get the suffix-stripped basename of the given path, and save it the +# global variable '$base'. +set_base_from () +{ + base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'` +} + +# If no dependency file was actually created by the compiler invocation, +# we still have to create a dummy depfile, to avoid errors with the +# Makefile "include basename.Plo" scheme. +make_dummy_depfile () +{ + echo "#dummy" > "$depfile" +} + +# Factor out some common post-processing of the generated depfile. +# Requires the auxiliary global variable '$tmpdepfile' to be set. +aix_post_process_depfile () +{ + # If the compiler actually managed to produce a dependency file, + # post-process it. + if test -f "$tmpdepfile"; then + # Each line is of the form 'foo.o: dependency.h'. + # Do two passes, one to just change these to + # $object: dependency.h + # and one to simply output + # dependency.h: + # which is needed to avoid the deleted-header problem. + { sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile" + sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile" + } > "$depfile" + rm -f "$tmpdepfile" + else + make_dummy_depfile + fi +} + +# A tabulation character. +tab=' ' +# A newline character. +nl=' +' +# Character ranges might be problematic outside the C locale. +# These definitions help. +upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ +lower=abcdefghijklmnopqrstuvwxyz +digits=0123456789 +alpha=${upper}${lower} + +if test -z "$depmode" || test -z "$source" || test -z "$object"; then + echo "depcomp: Variables source, object and depmode must be set" 1>&2 + exit 1 +fi + +# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. +depfile=${depfile-`echo "$object" | + sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} +tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} + +rm -f "$tmpdepfile" + +# Avoid interferences from the environment. +gccflag= dashmflag= + +# Some modes work just like other modes, but use different flags. We +# parameterize here, but still list the modes in the big case below, +# to make depend.m4 easier to write. Note that we *cannot* use a case +# here, because this file can only contain one case statement. +if test "$depmode" = hp; then + # HP compiler uses -M and no extra arg. + gccflag=-M + depmode=gcc +fi + +if test "$depmode" = dashXmstdout; then + # This is just like dashmstdout with a different argument. + dashmflag=-xM + depmode=dashmstdout +fi + +cygpath_u="cygpath -u -f -" +if test "$depmode" = msvcmsys; then + # This is just like msvisualcpp but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvisualcpp +fi + +if test "$depmode" = msvc7msys; then + # This is just like msvc7 but w/o cygpath translation. + # Just convert the backslash-escaped backslashes to single forward + # slashes to satisfy depend.m4 + cygpath_u='sed s,\\\\,/,g' + depmode=msvc7 +fi + +if test "$depmode" = xlc; then + # IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information. + gccflag=-qmakedep=gcc,-MF + depmode=gcc +fi + +case "$depmode" in +gcc3) +## gcc 3 implements dependency tracking that does exactly what +## we want. Yay! Note: for some reason libtool 1.4 doesn't like +## it if -MD -MP comes after the -MF stuff. Hmm. +## Unfortunately, FreeBSD c89 acceptance of flags depends upon +## the command line argument order; so add the flags where they +## appear in depend2.am. Note that the slowdown incurred here +## affects only configure: in makefiles, %FASTDEP% shortcuts this. + for arg + do + case $arg in + -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; + *) set fnord "$@" "$arg" ;; + esac + shift # fnord + shift # $arg + done + "$@" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + mv "$tmpdepfile" "$depfile" + ;; + +gcc) +## Note that this doesn't just cater to obsosete pre-3.x GCC compilers. +## but also to in-use compilers like IMB xlc/xlC and the HP C compiler. +## (see the conditional assignment to $gccflag above). +## There are various ways to get dependency output from gcc. Here's +## why we pick this rather obscure method: +## - Don't want to use -MD because we'd like the dependencies to end +## up in a subdir. Having to rename by hand is ugly. +## (We might end up doing this anyway to support other compilers.) +## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like +## -MM, not -M (despite what the docs say). Also, it might not be +## supported by the other compilers which use the 'gcc' depmode. +## - Using -M directly means running the compiler twice (even worse +## than renaming). + if test -z "$gccflag"; then + gccflag=-MD, + fi + "$@" -Wp,"$gccflag$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The second -e expression handles DOS-style file names with drive + # letters. + sed -e 's/^[^:]*: / /' \ + -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" +## This next piece of magic avoids the "deleted header file" problem. +## The problem is that when a header file which appears in a .P file +## is deleted, the dependency causes make to die (because there is +## typically no way to rebuild the header). We avoid this by adding +## dummy dependencies for each header file. Too bad gcc doesn't do +## this for us directly. +## Some versions of gcc put a space before the ':'. On the theory +## that the space means something, we add a space to the output as +## well. hp depmode also adds that space, but also prefixes the VPATH +## to the object. Take care to not repeat it in the output. +## Some versions of the HPUX 10.20 sed can't process this invocation +## correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +sgi) + if test "$libtool" = yes; then + "$@" "-Wp,-MDupdate,$tmpdepfile" + else + "$@" -MDupdate "$tmpdepfile" + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + + if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files + echo "$object : \\" > "$depfile" + # Clip off the initial element (the dependent). Don't try to be + # clever and replace this with sed code, as IRIX sed won't handle + # lines with more than a fixed number of characters (4096 in + # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; + # the IRIX cc adds comments like '#:fec' to the end of the + # dependency line. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \ + | tr "$nl" ' ' >> "$depfile" + echo >> "$depfile" + # The second pass generates a dummy entry for each header file. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ + >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" + ;; + +xlc) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +aix) + # The C for AIX Compiler uses -M and outputs the dependencies + # in a .u file. In older versions, this file always lives in the + # current directory. Also, the AIX compiler puts '$object:' at the + # start of each line; $object doesn't have directory information. + # Version 6 uses the directory in both cases. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.u + tmpdepfile2=$base.u + tmpdepfile3=$dir.libs/$base.u + "$@" -Wc,-M + else + tmpdepfile1=$dir$base.u + tmpdepfile2=$dir$base.u + tmpdepfile3=$dir$base.u + "$@" -M + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + aix_post_process_depfile + ;; + +tcc) + # tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26 + # FIXME: That version still under development at the moment of writing. + # Make that this statement remains true also for stable, released + # versions. + # It will wrap lines (doesn't matter whether long or short) with a + # trailing '\', as in: + # + # foo.o : \ + # foo.c \ + # foo.h \ + # + # It will put a trailing '\' even on the last line, and will use leading + # spaces rather than leading tabs (at least since its commit 0394caf7 + # "Emit spaces for -MD"). + "$@" -MD -MF "$tmpdepfile" + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each non-empty line is of the form 'foo.o : \' or ' dep.h \'. + # We have to change lines of the first kind to '$object: \'. + sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile" + # And for each line of the second kind, we have to emit a 'dep.h:' + # dummy dependency, to avoid the deleted-header problem. + sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile" + rm -f "$tmpdepfile" + ;; + +## The order of this option in the case statement is important, since the +## shell code in configure will try each of these formats in the order +## listed in this file. A plain '-MD' option would be understood by many +## compilers, so we must ensure this comes after the gcc and icc options. +pgcc) + # Portland's C compiler understands '-MD'. + # Will always output deps to 'file.d' where file is the root name of the + # source file under compilation, even if file resides in a subdirectory. + # The object file name does not affect the name of the '.d' file. + # pgcc 10.2 will output + # foo.o: sub/foo.c sub/foo.h + # and will wrap long lines using '\' : + # foo.o: sub/foo.c ... \ + # sub/foo.h ... \ + # ... + set_dir_from "$object" + # Use the source, not the object, to determine the base name, since + # that's sadly what pgcc will do too. + set_base_from "$source" + tmpdepfile=$base.d + + # For projects that build the same source file twice into different object + # files, the pgcc approach of using the *source* file root name can cause + # problems in parallel builds. Use a locking strategy to avoid stomping on + # the same $tmpdepfile. + lockdir=$base.d-lock + trap " + echo '$0: caught signal, cleaning up...' >&2 + rmdir '$lockdir' + exit 1 + " 1 2 13 15 + numtries=100 + i=$numtries + while test $i -gt 0; do + # mkdir is a portable test-and-set. + if mkdir "$lockdir" 2>/dev/null; then + # This process acquired the lock. + "$@" -MD + stat=$? + # Release the lock. + rmdir "$lockdir" + break + else + # If the lock is being held by a different process, wait + # until the winning process is done or we timeout. + while test -d "$lockdir" && test $i -gt 0; do + sleep 1 + i=`expr $i - 1` + done + fi + i=`expr $i - 1` + done + trap - 1 2 13 15 + if test $i -le 0; then + echo "$0: failed to acquire lock after $numtries attempts" >&2 + echo "$0: check lockdir '$lockdir'" >&2 + exit 1 + fi + + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + # Each line is of the form `foo.o: dependent.h', + # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. + # Do two passes, one to just change these to + # `$object: dependent.h' and one to simply `dependent.h:'. + sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +hp2) + # The "hp" stanza above does not work with aCC (C++) and HP's ia64 + # compilers, which have integrated preprocessors. The correct option + # to use with these is +Maked; it writes dependencies to a file named + # 'foo.d', which lands next to the object file, wherever that + # happens to be. + # Much of this is similar to the tru64 case; see comments there. + set_dir_from "$object" + set_base_from "$object" + if test "$libtool" = yes; then + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir.libs/$base.d + "$@" -Wc,+Maked + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + "$@" +Maked + fi + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" + do + test -f "$tmpdepfile" && break + done + if test -f "$tmpdepfile"; then + sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile" + # Add 'dependent.h:' lines. + sed -ne '2,${ + s/^ *// + s/ \\*$// + s/$/:/ + p + }' "$tmpdepfile" >> "$depfile" + else + make_dummy_depfile + fi + rm -f "$tmpdepfile" "$tmpdepfile2" + ;; + +tru64) + # The Tru64 compiler uses -MD to generate dependencies as a side + # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'. + # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put + # dependencies in 'foo.d' instead, so we check for that too. + # Subdirectories are respected. + set_dir_from "$object" + set_base_from "$object" + + if test "$libtool" = yes; then + # Libtool generates 2 separate objects for the 2 libraries. These + # two compilations output dependencies in $dir.libs/$base.o.d and + # in $dir$base.o.d. We have to check for both files, because + # one of the two compilations can be disabled. We should prefer + # $dir$base.o.d over $dir.libs/$base.o.d because the latter is + # automatically cleaned when .libs/ is deleted, while ignoring + # the former would cause a distcleancheck panic. + tmpdepfile1=$dir$base.o.d # libtool 1.5 + tmpdepfile2=$dir.libs/$base.o.d # Likewise. + tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504 + "$@" -Wc,-MD + else + tmpdepfile1=$dir$base.d + tmpdepfile2=$dir$base.d + tmpdepfile3=$dir$base.d + "$@" -MD + fi + + stat=$? + if test $stat -ne 0; then + rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + exit $stat + fi + + for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" + do + test -f "$tmpdepfile" && break + done + # Same post-processing that is required for AIX mode. + aix_post_process_depfile + ;; + +msvc7) + if test "$libtool" = yes; then + showIncludes=-Wc,-showIncludes + else + showIncludes=-showIncludes + fi + "$@" $showIncludes > "$tmpdepfile" + stat=$? + grep -v '^Note: including file: ' "$tmpdepfile" + if test $stat -ne 0; then + rm -f "$tmpdepfile" + exit $stat + fi + rm -f "$depfile" + echo "$object : \\" > "$depfile" + # The first sed program below extracts the file names and escapes + # backslashes for cygpath. The second sed program outputs the file + # name when reading, but also accumulates all include files in the + # hold buffer in order to output them again at the end. This only + # works with sed implementations that can handle large buffers. + sed < "$tmpdepfile" -n ' +/^Note: including file: *\(.*\)/ { + s//\1/ + s/\\/\\\\/g + p +}' | $cygpath_u | sort -u | sed -n ' +s/ /\\ /g +s/\(.*\)/'"$tab"'\1 \\/p +s/.\(.*\) \\/\1:/ +H +$ { + s/.*/'"$tab"'/ + G + p +}' >> "$depfile" + echo >> "$depfile" # make sure the fragment doesn't end with a backslash + rm -f "$tmpdepfile" + ;; + +msvc7msys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +#nosideeffect) + # This comment above is used by automake to tell side-effect + # dependency tracking mechanisms from slower ones. + +dashmstdout) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout, regardless of -o. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + test -z "$dashmflag" && dashmflag=-M + # Require at least two characters before searching for ':' + # in the target name. This is to cope with DOS-style filenames: + # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise. + "$@" $dashmflag | + sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile" + rm -f "$depfile" + cat < "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process this sed invocation + # correctly. Breaking it into two sed invocations is a workaround. + tr ' ' "$nl" < "$tmpdepfile" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +dashXmstdout) + # This case only exists to satisfy depend.m4. It is never actually + # run, as this mode is specially recognized in the preamble. + exit 1 + ;; + +makedepend) + "$@" || exit $? + # Remove any Libtool call + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + # X makedepend + shift + cleared=no eat=no + for arg + do + case $cleared in + no) + set ""; shift + cleared=yes ;; + esac + if test $eat = yes; then + eat=no + continue + fi + case "$arg" in + -D*|-I*) + set fnord "$@" "$arg"; shift ;; + # Strip any option that makedepend may not understand. Remove + # the object too, otherwise makedepend will parse it as a source file. + -arch) + eat=yes ;; + -*|$object) + ;; + *) + set fnord "$@" "$arg"; shift ;; + esac + done + obj_suffix=`echo "$object" | sed 's/^.*\././'` + touch "$tmpdepfile" + ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" + rm -f "$depfile" + # makedepend may prepend the VPATH from the source file name to the object. + # No need to regex-escape $object, excess matching of '.' is harmless. + sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" + # Some versions of the HPUX 10.20 sed can't process the last invocation + # correctly. Breaking it into two sed invocations is a workaround. + sed '1,2d' "$tmpdepfile" \ + | tr ' ' "$nl" \ + | sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \ + | sed -e 's/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" "$tmpdepfile".bak + ;; + +cpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + # Remove '-o $object'. + IFS=" " + for arg + do + case $arg in + -o) + shift + ;; + $object) + shift + ;; + *) + set fnord "$@" "$arg" + shift # fnord + shift # $arg + ;; + esac + done + + "$@" -E \ + | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ + | sed '$ s: \\$::' > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + cat < "$tmpdepfile" >> "$depfile" + sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvisualcpp) + # Important note: in order to support this mode, a compiler *must* + # always write the preprocessed file to stdout. + "$@" || exit $? + + # Remove the call to Libtool. + if test "$libtool" = yes; then + while test "X$1" != 'X--mode=compile'; do + shift + done + shift + fi + + IFS=" " + for arg + do + case "$arg" in + -o) + shift + ;; + $object) + shift + ;; + "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") + set fnord "$@" + shift + shift + ;; + *) + set fnord "$@" "$arg" + shift + shift + ;; + esac + done + "$@" -E 2>/dev/null | + sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" + rm -f "$depfile" + echo "$object : \\" > "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile" + echo "$tab" >> "$depfile" + sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" + rm -f "$tmpdepfile" + ;; + +msvcmsys) + # This case exists only to let depend.m4 do its work. It works by + # looking at the text of this script. This case will never be run, + # since it is checked for above. + exit 1 + ;; + +none) + exec "$@" + ;; + +*) + echo "Unknown depmode $depmode" 1>&2 + exit 1 + ;; +esac + +exit 0 + +# Local Variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/3rd/pcre2/doc/html/NON-AUTOTOOLS-BUILD.txt b/3rd/pcre2/doc/html/NON-AUTOTOOLS-BUILD.txt new file mode 100644 index 00000000..bb687f7d --- /dev/null +++ b/3rd/pcre2/doc/html/NON-AUTOTOOLS-BUILD.txt @@ -0,0 +1,442 @@ +Building PCRE2 without using autotools +-------------------------------------- + +This document contains the following sections: + + General + Generic instructions for the PCRE2 C libraries + Stack size in Windows environments + Linking programs in Windows environments + Calling conventions in Windows environments + Comments about Win32 builds + Building PCRE2 on Windows with CMake + Building PCRE2 on Windows with Visual Studio + Testing with RunTest.bat + Building PCRE2 on native z/OS and z/VM + Building PCRE2 under VMS + + +GENERAL + +The source of the PCRE2 libraries consists entirely of code written in Standard +C, and so should compile successfully on any system that has a Standard C +compiler and library. + +The PCRE2 distribution includes a "configure" file for use by the +configure/make (autotools) build system, as found in many Unix-like +environments. The README file contains information about the options for +"configure". + +There is also support for CMake, which some users prefer, especially in Windows +environments, though it can also be run in Unix-like environments. See the +section entitled "Building PCRE2 on Windows with CMake" below. + +Versions of src/config.h and src/pcre2.h are distributed in the PCRE2 tarballs +under the names src/config.h.generic and src/pcre2.h.generic. These are +provided for those who build PCRE2 without using "configure" or CMake. If you +use "configure" or CMake, the .generic versions are not used. + + +GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES + +There are three possible PCRE2 libraries, each handling data with a specific +code unit width: 8, 16, or 32 bits. You can build any combination of them. The +following are generic instructions for building a PCRE2 C library "by hand". If +you are going to use CMake, this section does not apply to you; you can skip +ahead to the CMake section. Note that the settings concerned with 8-bit, +16-bit, and 32-bit code units relate to the type of data string that PCRE2 +processes. They are NOT referring to the underlying operating system bit width. +You do not have to do anything special to compile in a 64-bit environment, for +example. + + (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the + macro settings that it contains to whatever is appropriate for your + environment. In particular, you can alter the definition of the NEWLINE + macro to specify what character(s) you want to be interpreted as line + terminators by default. You need to #define at least one of + SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which + libraries you are going to build. You must set all that apply. + + When you subsequently compile any of the PCRE2 modules, you must specify + -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the + sources. + + An alternative approach is not to edit src/config.h, but to use -D on the + compiler command line to make any changes that you need to the + configuration options. In this case -DHAVE_CONFIG_H must not be set. + + NOTE: There have been occasions when the way in which certain parameters + in src/config.h are used has changed between releases. (In the + configure/make world, this is handled automatically.) When upgrading to a + new release, you are strongly advised to review src/config.h.generic + before re-using what you had previously. + + Note also that the src/config.h.generic file is created from a config.h + that was generated by Autotools, which automatically includes settings of + a number of macros that are not actually used by PCRE2 (for example, + HAVE_DLFCN_H). + + (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h. + + (3) EITHER: + Copy or rename file src/pcre2_chartables.c.dist as + src/pcre2_chartables.c. + + OR: + Compile src/pcre2_dftables.c as a stand-alone program (using + -DHAVE_CONFIG_H if you have set up src/config.h), and then run it with + the single argument "src/pcre2_chartables.c". This generates a set of + standard character tables and writes them to that file. The tables are + generated using the default C locale for your system. If you want to use + a locale that is specified by LC_xxx environment variables, add the -L + option to the pcre2_dftables command. You must use this method if you + are building on a system that uses EBCDIC code. + + The tables in src/pcre2_chartables.c are defaults. The caller of PCRE2 can + specify alternative tables at run time. + + (4) For a library that supports 8-bit code units in the character strings that + it processes, compile the following source files from the src directory, + setting -DPCRE2_CODE_UNIT_WIDTH=8 as a compiler option. Also set + -DHAVE_CONFIG_H if you have set up src/config.h with your configuration, + or else use other -D settings to change the configuration as required. + + pcre2_auto_possess.c + pcre2_chkdint.c + pcre2_chartables.c + pcre2_compile.c + pcre2_compile_class.c + pcre2_config.c + pcre2_context.c + pcre2_convert.c + pcre2_dfa_match.c + pcre2_error.c + pcre2_extuni.c + pcre2_find_bracket.c + pcre2_jit_compile.c + pcre2_maketables.c + pcre2_match.c + pcre2_match_data.c + pcre2_newline.c + pcre2_ord2utf.c + pcre2_pattern_info.c + pcre2_script_run.c + pcre2_serialize.c + pcre2_string_utils.c + pcre2_study.c + pcre2_substitute.c + pcre2_substring.c + pcre2_tables.c + pcre2_ucd.c + pcre2_valid_utf.c + pcre2_xclass.c + + Make sure that you include -I. in the compiler command (or equivalent for + an unusual compiler) so that all included PCRE2 header files are first + sought in the src directory under the current directory. Otherwise you run + the risk of picking up a previously-installed file from somewhere else. + + Note that you must compile pcre2_jit_compile.c, even if you have not + defined SUPPORT_JIT in src/config.h, because when JIT support is not + configured, dummy functions are compiled. When JIT support IS configured, + pcre2_jit_compile.c #includes other files from the sljit dependency, + all of whose names begin with "sljit". It also #includes + src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile + those yourself. + + Note also that the pcre2_fuzzsupport.c file contains special code that is + useful to those who want to run fuzzing tests on the PCRE2 library. Unless + you are doing that, you can ignore it. + + (5) Now link all the compiled code into an object library in whichever form + your system keeps such libraries. This is the PCRE2 C 8-bit library, + typically called something like libpcre2-8. If your system has static and + shared libraries, you may have to do this once for each type. + + (6) If you want to build a library that supports 16-bit or 32-bit code units, + set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4 + above. If you want to build more than one PCRE2 library, repeat steps 4 + and 5 as necessary. + + (7) If you want to build the POSIX wrapper functions (which apply only to the + 8-bit library), ensure that you have the src/pcre2posix.h file and then + compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix + library. If targeting a DLL in Windows, make sure to include + -DPCRE2POSIX_SHARED with your compiler flags. + + (8) The pcre2test program can be linked with any combination of the 8-bit, + 16-bit and 32-bit libraries (depending on what you specfied in + src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if + necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the + appropriate library/ies. If you compiled an 8-bit library, pcre2test also + needs the pcre2posix wrapper library. + + (9) Run pcre2test on the testinput files in the testdata directory, and check + that the output matches the corresponding testoutput files. There are + comments about what each test does in the section entitled "Testing PCRE2" + in the README file. If you compiled more than one of the 8-bit, 16-bit and + 32-bit libraries, you need to run pcre2test with the -16 option to do + 16-bit tests and with the -32 option to do 32-bit tests. + + Some tests are relevant only when certain build-time options are selected. + For example, test 4 is for Unicode support, and will not run if you have + built PCRE2 without it. See the comments at the start of each testinput + file. If you have a suitable Unix-like shell, the RunTest script will run + the appropriate tests for you. The command "RunTest list" will output a + list of all the tests. + + Note that the supplied files are in Unix format, with just LF characters + as line terminators. You may need to edit them to change this if your + system uses a different convention. + +(10) If you have built PCRE2 with SUPPORT_JIT, the JIT features can be tested + by running pcre2test with the -jit option. This is done automatically by + the RunTest script. You might also like to build and run the freestanding + JIT test program, src/pcre2_jit_test.c. + +(11) The pcre2test program tests the POSIX wrapper library, but there is also a + freestanding test program in src/pcre2posix_test.c. It must be linked with + both the pcre2posix library and the 8-bit PCRE2 library. + +(12) If you want to use the pcre2grep command, compile and link + src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need + the pcre2posix library). If you have built the PCRE2 library with JIT + support by defining SUPPORT_JIT in src/config.h, you can also define + SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless + it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without + defining SUPPORT_JIT, pcre2grep does not try to make use of JIT. + + +STACK SIZE IN WINDOWS ENVIRONMENTS + +Prior to release 10.30 the default system stack size of 1MiB in some Windows +environments caused issues with some tests. This should no longer be the case +for 10.30 and later releases. + + +LINKING PROGRAMS IN WINDOWS ENVIRONMENTS + +If you want to statically link a program against a PCRE2 library in the form of +a non-dll .a file, you must define PCRE2_STATIC before including src/pcre2.h. + + +CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS + +It is possible to compile programs to use different calling conventions using +MSVC. Search the web for "calling conventions" for more information. To make it +easier to change the calling convention for the exported functions in a +PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external +definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is +not set, it defaults to empty; the default calling convention is then used +(which is what is wanted most of the time). + + +COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE2 ON WINDOWS WITH CMAKE") + +There are two ways of building PCRE2 using the "configure, make, make install" +paradigm on Windows systems: using MinGW or using Cygwin. These are not at all +the same thing; they are completely different from each other. There is also +support for building using CMake, which some users find a more straightforward +way of building PCRE2 under Windows. + +The MinGW home page (http://www.mingw.org/) says this: + + MinGW: A collection of freely available and freely distributable Windows + specific header files and import libraries combined with GNU toolsets that + allow one to produce native Windows programs that do not rely on any + 3rd-party C runtime DLLs. + +The Cygwin home page (http://www.cygwin.com/) says this: + + Cygwin is a Linux-like environment for Windows. It consists of two parts: + + . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing + substantial Linux API functionality + + . A collection of tools which provide Linux look and feel. + +On both MinGW and Cygwin, PCRE2 should build correctly using: + + ./configure && make && make install + +This should create two libraries called libpcre2-8 and libpcre2-posix. These +are independent libraries: when you link with libpcre2-posix you must also link +with libpcre2-8, which contains the basic functions. + +Using Cygwin's compiler generates libraries and executables that depend on +cygwin1.dll. If a library that is generated this way is distributed, +cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL +licence, this forces not only PCRE2 to be under the GPL, but also the entire +application. A distributor who wants to keep their own code proprietary must +purchase an appropriate Cygwin licence. + +MinGW has no such restrictions. The MinGW compiler generates a library or +executable that can run standalone on Windows without any third party dll or +licensing issues. + +But there is more complication: + +If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is +to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a +front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's +gcc and MinGW's gcc). So, a user can: + +. Build native binaries by using MinGW or by getting Cygwin and using + -mno-cygwin. + +. Build binaries that depend on cygwin1.dll by using Cygwin with the normal + compiler flags. + +The test files that are supplied with PCRE2 are in UNIX format, with LF +characters as line terminators. Unless your PCRE2 library uses a default +newline option that includes LF as a valid newline, it may be necessary to +change the line terminators in the test files to get some of the tests to work. + + +BUILDING PCRE2 ON WINDOWS WITH CMAKE + +CMake is an alternative configuration facility that can be used instead of +"configure". CMake creates project files (make files, solution files, etc.) +tailored to numerous development environments, including Visual Studio, +Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no +spaces in the names for your CMake installation and your PCRE2 source and build +directories. + +If you are using CMake and encounter errors, deleting the CMake cache and +restarting from a fresh build may fix the error. In the CMake GUI, the cache can +be deleted by selecting "File > Delete Cache"; or the folder "CMakeCache" can +be deleted. + +1. Install the latest CMake version available from http://www.cmake.org/, and + ensure that cmake\bin is on your path. + +2. Unzip (retaining folder structure) the PCRE2 source tree into a source + directory such as C:\pcre2. You should ensure your local date and time + is not earlier than the file dates in your source dir if the release is + very new. + +3. Create a new, empty build directory, preferably a subdirectory of the + source dir. For example, C:\pcre2\pcre2-xx\build. + +4. Run CMake. + + - Using the CLI, simply run `cmake ..` inside the `build/` directory. You can + use the `ccmake` ncurses GUI to select and configure PCRE2 features. + + - Using the CMake GUI: + + a) Run cmake-gui from the Shell environment of your build tool, for + example, Msys for Msys/MinGW or Visual Studio Command Prompt for + VC/VC++. + + b) Enter C:\pcre2\pcre2-xx and C:\pcre2\pcre2-xx\build for the source and + build directories, respectively. + + c) Press the "Configure" button. + + d) Select the particular IDE / build tool that you are using (Visual + Studio, MSYS makefiles, MinGW makefiles, etc.) + + e) The GUI will then list several configuration options. This is where + you can disable Unicode support or select other PCRE2 optional features. + + f) Press "Configure" again. The adjacent "Generate" button should now be + active. + + g) Press "Generate". + +5. The build directory should now contain a usable build system, be it a + solution file for Visual Studio, makefiles for MinGW, etc. Exit from + cmake-gui and use the generated build system with your compiler or IDE. + E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE2 + solution, select the desired configuration (Debug, or Release, etc.) and + build the ALL_BUILD project. + + Regardless of build system used, `cmake --build .` will build it. + +6. If during configuration with cmake-gui you've elected to build the test + programs, you can execute them by building the test project. E.g., for + MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The + most recent build configuration is targeted by the tests. A summary of + test results is presented. Complete test output is subsequently + available for review in Testing\Temporary under your build dir. + + Regardless of build system used, `ctest` will run the tests. + + +BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO + +The code currently cannot be compiled without an inttypes.h header, which is +available only with Visual Studio 2013 or newer. However, this portable and +permissively-licensed implementation of the stdint.h header could be used as an +alternative: + + http://www.azillionmonkeys.com/qed/pstdint.h + +Just rename it and drop it into the top level of the build tree. + + +TESTING WITH RUNTEST.BAT + +If configured with CMake, building the test project ("make test" or building +ALL_TESTS in Visual Studio) creates (and runs) pcre2_test.bat (and depending +on your configuration options, possibly other test programs) in the build +directory. The pcre2_test.bat script runs RunTest.bat with correct source and +exe paths. + +For manual testing with RunTest.bat, provided the build dir is a subdirectory +of the source directory: Open command shell window. Chdir to the location +of your pcre2test.exe and pcre2grep.exe programs. Call RunTest.bat with +"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate. + +To run only a particular test with RunTest.Bat provide a test number argument. + +Otherwise: + +1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe + have been created. + +2. Edit RunTest.bat to identify the full or relative location of + the pcre2 source (wherein which the testdata folder resides), e.g.: + + set srcdir=C:\pcre2\pcre2-10.00 + +3. In a Windows command environment, chdir to the location of your bat and + exe programs. + +4. Run RunTest.bat. Test outputs will automatically be compared to expected + results, and discrepancies will be identified in the console output. + +To independently test the just-in-time compiler, run pcre2_jit_test.exe. + + +BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM + +z/OS and z/VM are operating systems for mainframe computers, produced by IBM. +The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and +applications can be supported through UNIX System Services, and in such an +environment it should be possible to build PCRE2 in the same way as in other +systems, with the EBCDIC related configuration settings, but it is not known if +anybody has tried this. + +In native z/OS (without UNIX System Services) and in z/VM, special ports are +required. For details, please see file 939 on this web site: + + http://www.cbttape.org + +Everything in that location, source and executable, is in EBCDIC and native +z/OS file formats. The port provides an API for LE languages such as COBOL and +for the z/OS and z/VM versions of the Rexx languages. + + +BUILDING PCRE2 UNDER VMS + +Alexey Chuphin has contributed some auxiliary files for building PCRE2 under +OpenVMS. They are in the "vms" directory in the distribution tarball. Please +read the file called vms/openvms_readme.txt. The pcre2test and pcre2grep +programs contain some VMS-specific code. + +============================== +Last updated: 26 December 2024 +============================== + diff --git a/3rd/pcre2/doc/html/README.txt b/3rd/pcre2/doc/html/README.txt new file mode 100644 index 00000000..5a50f7f1 --- /dev/null +++ b/3rd/pcre2/doc/html/README.txt @@ -0,0 +1,970 @@ +README file for PCRE2 (Perl-compatible regular expression library) +------------------------------------------------------------------ + +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features, and the internals have been improved. The original PCRE1 library is +now obsolete and no longer maintained. The latest release of PCRE2 is available +in .tar.gz, tar.bz2, or .zip form from this GitHub repository: + +https://github.com/PCRE2Project/pcre2/releases + +There is a mailing list for discussion about the development of PCRE2 at +pcre2-dev@googlegroups.com. You can subscribe by sending an email to +pcre2-dev+subscribe@googlegroups.com. + +You can access the archives and also subscribe or manage your subscription +here: + +https://groups.google.com/g/pcre2-dev + +Please read the NEWS file if you are upgrading from a previous release. The +contents of this README file are: + + The PCRE2 APIs + Documentation for PCRE2 + Building PCRE2 on non-Unix-like systems + Building PCRE2 without using autotools + Building PCRE2 using autotools + Retrieving configuration information + Shared libraries + Cross-compiling using autotools + Making new tarballs + Testing PCRE2 + Character tables + File manifest + + +The PCRE2 APIs +-------------- + +PCRE2 is written in C, and it has its own API. There are three sets of +functions, one for the 8-bit library, which processes strings of bytes, one for +the 16-bit library, which processes strings of 16-bit values, and one for the +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. + +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. + +The header file for the POSIX-style functions is called pcre2posix.h. The +official POSIX name is regex.h, but I did not want to risk possible problems +with existing files of that name by distributing it that way. To use PCRE2 with +an existing program that uses the POSIX API, pcre2posix.h will have to be +renamed or pointed at by a link (or the program modified, of course). See the +pcre2posix documentation for more details. + + +Documentation for PCRE2 +----------------------- + +If you install PCRE2 in the normal way on a Unix-like system, you will end up +with a set of man pages whose names all start with "pcre2". The one that is +just called "pcre2" lists all the others. In addition to these man pages, the +PCRE2 documentation is supplied in two other forms: + + 1. There are files called doc/pcre2.txt, doc/pcre2grep.txt, and + doc/pcre2test.txt in the source distribution. The first of these is a + concatenation of the text forms of all the section 3 man pages except the + listing of pcre2demo.c and those that summarize individual functions. The + other two are the text forms of the section 1 man pages for the pcre2grep + and pcre2test commands. These text forms are provided for ease of scanning + with text editors or similar tools. They are installed in + /share/doc/pcre2, where is the installation prefix + (defaulting to /usr/local). + + 2. A set of files containing all the documentation in HTML form, hyperlinked + in various ways, and rooted in a file called index.html, is distributed in + doc/html and installed in /share/doc/pcre2/html. + + +Building PCRE2 on non-Unix-like systems +--------------------------------------- + +For a non-Unix-like system, please read the file NON-AUTOTOOLS-BUILD, though if +your system supports the use of "configure" and "make" you may be able to build +PCRE2 using autotools in the same way as for many Unix-like systems. + +PCRE2 can also be configured using CMake, which can be run in various ways +(command line, GUI, etc). This creates Makefiles, solution files, etc. The file +NON-AUTOTOOLS-BUILD has information about CMake. + +PCRE2 has been compiled on many different operating systems. It should be +straightforward to build PCRE2 on any system that has a Standard C compiler and +library, because it uses only Standard C functions. + + +Building PCRE2 without using autotools +-------------------------------------- + +The use of autotools (in particular, libtool) is problematic in some +environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD +file for ways of building PCRE2 without using autotools. + + +Building PCRE2 using autotools +------------------------------ + +The following instructions assume the use of the widely used "configure; make; +make install" (autotools) process. + +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set +to the directory where you want the files to be created. This command is a +standard GNU "autoconf" configuration script, for which generic instructions +are supplied in the file INSTALL. + +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + +Most commonly, people build PCRE2 within its own distribution directory, and in +this case, on many systems, just running "./configure" is sufficient. However, +the usual methods of changing standard defaults are available. For example: + +CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local + +This command specifies that the C compiler should be run with the flags '-O2 +-Wall' instead of the default, and that "make install" should install PCRE2 +under /opt/local instead of the default /usr/local. + +If you want to build in a different directory, just run "configure" with that +directory as current. For example, suppose you have unpacked the PCRE2 source +into /source/pcre2/pcre2-xxx, but you want to build it in +/build/pcre2/pcre2-xxx: + +cd /build/pcre2/pcre2-xxx +/source/pcre2/pcre2-xxx/configure + +PCRE2 is written in C and is normally compiled as a C library. However, it is +possible to build it as a C++ library, though the provided building apparatus +does not have any features to support this. + +There are some optional features that can be included or omitted from the PCRE2 +library. They are also documented in the pcre2build man page. + +. By default, both shared and static libraries are built. You can change this + by adding one of these options to the "configure" command: + + --disable-shared + --disable-static + + Setting --disable-shared ensures that PCRE2 libraries are built as static + libraries. The binaries that are then created as part of the build process + (for example, pcre2test and pcre2grep) are linked statically with one or more + PCRE2 libraries, but may also be dynamically linked with other libraries such + as libc. If you want these binaries to be fully statically linked, you can + set LDFLAGS like this: + + LDFLAGS=--static ./configure --disable-shared + + Note the two hyphens in --static. Of course, this works only if static + versions of all the relevant libraries are available for linking. See also + "Shared libraries" below. + +. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to + the "configure" command, the 16-bit library is also built. If you add + --enable-pcre2-32 to the "configure" command, the 32-bit library is also + built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 + to disable building the 8-bit library. + +. If you want to include support for just-in-time (JIT) compiling, which can + give large performance improvements on certain platforms, add --enable-jit to + the "configure" command. This support is available only for certain hardware + architectures. If you try to enable it on an unsupported architecture, there + will be a compile time error. If in doubt, use --enable-jit=auto, which + enables JIT only if the current hardware is supported. + +. If you are enabling JIT under SELinux environment you may also want to add + --enable-jit-sealloc, which enables the use of an executable memory allocator + that is compatible with SELinux. Warning: this allocator is experimental! + It does not support fork() operation and may crash when no disk space is + available. This option has no effect if JIT is disabled. + +. If you do not want to make use of the default support for UTF-8 Unicode + character strings in the 8-bit library, UTF-16 Unicode character strings in + the 16-bit library, or UTF-32 Unicode character strings in the 32-bit + library, you can add --disable-unicode to the "configure" command. This + reduces the size of the libraries. It is not possible to configure one + library with Unicode support, and another without, in the same configuration. + It is also not possible to use --enable-ebcdic (see below) with Unicode + support, so if this option is set, you must also use --disable-unicode. + + When Unicode support is available, the use of a UTF encoding still has to be + enabled by setting the PCRE2_UTF option at run time or starting a pattern + with (*UTF). When PCRE2 is compiled with Unicode support, its input can only + either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. + + As well as supporting UTF strings, Unicode support includes support for the + \P, \p, and \X sequences that recognize Unicode character properties. + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). + +. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any + of the preceding, or any of the Unicode newline sequences, or the NUL (zero) + character as indicating the end of a line. Whatever you specify at build time + is the default; the caller of PCRE2 can change the selection at run time. The + default newline indicator is a single LF character (the Unix standard). You + can specify the default newline indicator by adding --enable-newline-is-cr, + --enable-newline-is-lf, --enable-newline-is-crlf, + --enable-newline-is-anycrlf, --enable-newline-is-any, or + --enable-newline-is-nul to the "configure" command, respectively. + +. By default, the sequence \R in a pattern matches any Unicode line ending + sequence. This is independent of the option specifying what PCRE2 considers + to be the end of a line (see above). However, the caller of PCRE2 can + restrict \R to match only CR, LF, or CRLF. You can make this the default by + adding --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R"). + +. In a pattern, the escape sequence \C matches a single code unit, even in a + UTF mode. This can be dangerous because it breaks up multi-code-unit + characters. You can build PCRE2 with the use of \C permanently locked out by + adding --enable-never-backslash-C (note the upper case C) to the "configure" + command. When \C is allowed by the library, individual applications can lock + it out by calling pcre2_compile() with the PCRE2_NEVER_BACKSLASH_C option. + +. PCRE2 has a counter that limits the depth of nesting of parentheses in a + pattern. This limits the amount of system stack that a pattern uses when it + is compiled. The default is 250, but you can change it by setting, for + example, + + --with-parens-nest-limit=500 + +. PCRE2 has a counter that can be set to limit the amount of computing resource + it uses when matching a pattern. If the limit is exceeded during a match, the + match fails. The default is ten million. You can change the default by + setting, for example, + + --with-match-limit=500000 + + on the "configure" command. This is just the default; individual calls to + pcre2_match() or pcre2_dfa_match() can supply their own value. There is more + discussion in the pcre2api man page (search for pcre2_set_match_limit). + +. There is a separate counter that limits the depth of nested backtracking + (pcre2_match()) or nested function calls (pcre2_dfa_match()) during a + matching process, which indirectly limits the amount of heap memory that is + used, and in the case of pcre2_dfa_match() the amount of stack as well. This + counter also has a default of ten million, which is essentially "unlimited". + You can change the default by setting, for example, + + --with-match-limit-depth=5000 + + There is more discussion in the pcre2api man page (search for + pcre2_set_depth_limit). + +. You can also set an explicit limit on the amount of heap memory used by + the pcre2_match() and pcre2_dfa_match() interpreters: + + --with-heap-limit=500 + + The units are kibibytes (units of 1024 bytes). This limit does not apply when + the JIT optimization (which has its own memory control features) is used. + There is more discussion on the pcre2api man page (search for + pcre2_set_heap_limit). + +. In the 8-bit library, the default maximum compiled pattern size is around + 64 kibibytes. You can increase this by adding --with-link-size=3 to the + "configure" command. PCRE2 then uses three bytes instead of two for offsets + to different parts of the compiled pattern. In the 16-bit library, + --with-link-size=3 is the same as --with-link-size=4, which (in both + libraries) uses four-byte offsets. Increasing the internal link size reduces + performance in the 8-bit and 16-bit libraries. In the 32-bit library, the + link size setting is ignored, as 4-byte offsets are always used. + +. Lookbehind assertions in which one or more branches can match a variable + number of characters are supported only if there is a maximum matching length + for each top-level branch. There is a limit to this maximum that defaults to + 255 characters. You can alter this default by a setting such as + + --with-max-varlookbehind=100 + + The limit can be changed at runtime by calling pcre2_set_max_varlookbehind(). + Lookbehind assertions in which every branch matches a fixed number of + characters (not necessarily all the same) are not constrained by this limit. + +. For speed, PCRE2 uses four tables for manipulating and identifying characters + whose code point values are less than 256. By default, it uses a set of + tables for ASCII encoding that is part of the distribution. If you specify + + --enable-rebuild-chartables + + a program called pcre2_dftables is compiled and run in the default C locale + when you obey "make". It builds a source file called pcre2_chartables.c. If + you do not specify this option, pcre2_chartables.c is created as a copy of + pcre2_chartables.c.dist. See "Character tables" below for further + information. + +. It is possible to compile PCRE2 for use on systems that use EBCDIC as their + character code (as opposed to ASCII/Unicode) by specifying + + --enable-ebcdic --disable-unicode + + This automatically implies --enable-rebuild-chartables (see above). However, + when PCRE2 is built this way, it always operates in EBCDIC. It cannot support + both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25, + which specifies that the code value for the EBCDIC NL character is 0x25 + instead of the default 0x15. + +. If you specify --enable-debug, additional debugging code is included in the + build. This option is intended for use by the PCRE2 maintainers. + +. In environments where valgrind is installed, if you specify + + --enable-valgrind + + PCRE2 will use valgrind annotations to mark certain memory regions as + unaddressable. This allows it to detect invalid memory accesses, and is + mostly useful for debugging PCRE2 itself. + +. In environments where the gcc compiler is used and lcov is installed, if you + specify + + --enable-coverage + + the build process implements a code coverage report for the test suite. The + report is generated by running "make coverage". If ccache is installed on + your system, it must be disabled when building PCRE2 for coverage reporting. + You can do this by setting the environment variable CCACHE_DISABLE=1 before + running "make" to build PCRE2. There is more information about coverage + reporting in the "pcre2build" documentation. + +. When JIT support is enabled, pcre2grep automatically makes use of it, unless + you add --disable-pcre2grep-jit to the "configure" command. + +. There is support for calling external programs during matching in the + pcre2grep command, using PCRE2's callout facility with string arguments. This + support can be disabled by adding --disable-pcre2grep-callout to the + "configure" command. There are two kinds of callout: one that generates + output from inbuilt code, and another that calls an external program. The + latter has special support for Windows and VMS; otherwise it assumes the + existence of the fork() function. This facility can be disabled by adding + --disable-pcre2grep-callout-fork to the "configure" command. + +. The pcre2grep program currently supports only 8-bit data files, and so + requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use + libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by + specifying one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + Of course, the relevant libraries must be installed on your system. + +. The default starting size (in bytes) of the internal buffer used by pcre2grep + can be set by, for example: + + --with-pcre2grep-bufsize=51200 + + The value must be a plain integer. The default is 20480. The amount of memory + used by pcre2grep is actually three times this number, to allow for "before" + and "after" lines. If very long lines are encountered, the buffer is + automatically enlarged, up to a fixed maximum size. + +. The default maximum size of pcre2grep's internal buffer can be set by, for + example: + + --with-pcre2grep-max-bufsize=2097152 + + The default is either 1048576 or the value of --with-pcre2grep-bufsize, + whichever is the larger. + +. It is possible to compile pcre2test so that it links with the libreadline + or libedit libraries, by specifying, respectively, + + --enable-pcre2test-libreadline or --enable-pcre2test-libedit + + If this is done, when pcre2test's input is from a terminal, it reads it using + the readline() function. This provides line-editing and history facilities. + Note that libreadline is GPL-licensed, so if you distribute a binary of + pcre2test linked in this way, there may be licensing issues. These can be + avoided by linking with libedit (which has a BSD licence) instead. + + Enabling libreadline causes the -lreadline option to be added to the + pcre2test build. In many operating environments with a system-installed + readline library this is sufficient. However, in some environments (e.g. if + an unmodified distribution version of readline is in use), it may be + necessary to specify something like LIBS="-lncurses" as well. This is + because, to quote the readline INSTALL, "Readline uses the termcap functions, + but does not link with the termcap or curses library itself, allowing + applications which link with readline the option to choose an appropriate + library." If you get error messages about missing functions tgetstr, tgetent, + tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses + library should fix it. + +. The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in + environments other than Microsoft Visual Studio versions earlier than 2013 + when __STDC_VERSION__ is defined and has a value greater than or equal to + 199901L (indicating C99). However, there is at least one environment that + claims to be C99 but does not support these modifiers. If + --disable-percent-zt is specified, no use is made of the z or t modifiers. + Instead of %td or %zu, %lu is used, with a cast for size_t values. + +. There is a special option called --enable-fuzz-support for use by people who + want to run fuzzing tests on PCRE2. If set, it causes an extra library + called libpcre2-fuzzsupport.a to be built, but not installed. This contains + a single function called LLVMFuzzerTestOneInput() whose arguments are a + pointer to a string and the length of the string. When called, this function + tries to compile the string as a pattern, and if that succeeds, to match + it. This is done both with no options and with some random options bits that + are generated from the string. Setting --enable-fuzz-support also causes an + executable called pcre2fuzzcheck-{8,16,32} to be created. This is normally + run under valgrind or used when PCRE2 is compiled with address sanitizing + enabled. It calls the fuzzing function and outputs information about what it + is doing. The input strings are specified by arguments: if an argument + starts with "=" the rest of it is a literal input string. Otherwise, it is + assumed to be a file name, and the contents of the file are the test string. + +. Releases before 10.30 could be compiled with --disable-stack-for-recursion, + which caused pcre2_match() to use individual blocks on the heap for + backtracking instead of recursive function calls (which use the stack). This + is now obsolete because pcre2_match() was refactored always to use the heap + (in a much more efficient way than before). This option is retained for + backwards compatibility, but has no effect other than to output a warning. + +The "configure" script builds the following files for the basic C library: + +. Makefile the makefile that builds the library +. src/config.h build-time configuration options for the library +. src/pcre2.h the public PCRE2 header file +. pcre2-config script that shows the building settings such as CFLAGS + that were set for "configure" +. libpcre2-8.pc ) +. libpcre2-16.pc ) data for the pkg-config command +. libpcre2-32.pc ) +. libpcre2-posix.pc ) +. libtool script that builds shared and/or static libraries + +Versions of config.h and pcre2.h are distributed in the src directory of PCRE2 +tarballs under the names config.h.generic and pcre2.h.generic. These are +provided for those who have to build PCRE2 without using "configure" or CMake. +If you use "configure" or CMake, the .generic versions are not used. + +The "configure" script also creates config.status, which is an executable +script that can be run to recreate the configuration, and config.log, which +contains compiler output from tests that "configure" runs. + +Once "configure" has run, you can run "make". This builds whichever of the +libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test +program called pcre2test. If you enabled JIT support with --enable-jit, another +test program called pcre2_jit_test is built as well. If the 8-bit library is +built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also +built. Running "make" with the -j option may speed up compilation on +multiprocessor systems. + +The command "make check" runs all the appropriate tests. Details of the PCRE2 +tests are given below in a separate section of this document. The -j option of +"make" can also be used when running the tests. + +You can use "make install" to install PCRE2 into live directories on your +system. The following are installed (file names are all relative to the + that is set when "configure" is run): + + Commands (bin): + pcre2test + pcre2grep (if 8-bit support is enabled) + pcre2-config + + Libraries (lib): + libpcre2-8 (if 8-bit support is enabled) + libpcre2-16 (if 16-bit support is enabled) + libpcre2-32 (if 32-bit support is enabled) + libpcre2-posix (if 8-bit support is enabled) + + Configuration information (lib/pkgconfig): + libpcre2-8.pc + libpcre2-16.pc + libpcre2-32.pc + libpcre2-posix.pc + + Header files (include): + pcre2.h + pcre2posix.h + + Man pages (share/man/man{1,3}): + pcre2grep.1 + pcre2test.1 + pcre2-config.1 + pcre2.3 + pcre2*.3 (lots more pages, all starting "pcre2") + + HTML documentation (share/doc/pcre2/html): + index.html + *.html (lots more pages, hyperlinked from index.html) + + Text file documentation (share/doc/pcre2): + AUTHORS + COPYING + ChangeLog + LICENCE + NEWS + README + SECURITY + pcre2.txt (a concatenation of the man(3) pages) + pcre2test.txt the pcre2test man page + pcre2grep.txt the pcre2grep man page + pcre2-config.txt the pcre2-config man page + +If you want to remove PCRE2 from your system, you can run "make uninstall". +This removes all the files that "make install" installed. However, it does not +remove any directories, because these are often shared with other programs. + + +Retrieving configuration information +------------------------------------ + +Running "make install" installs the command pcre2-config, which can be used to +recall information about the PCRE2 configuration and installation. For example: + + pcre2-config --version + +prints the version number, and + + pcre2-config --libs8 + +outputs information about where the 8-bit library is installed. This command +can be included in makefiles for programs that use PCRE2, saving the programmer +from having to remember too many details. Run pcre2-config with no arguments to +obtain a list of possible arguments. + +The pkg-config command is another system for saving and retrieving information +about installed libraries. Instead of separate commands for each library, a +single command is used. For example: + + pkg-config --libs libpcre2-16 + +The data is held in *.pc files that are installed in a directory called +/lib/pkgconfig. + + +Shared libraries +---------------- + +The default distribution builds PCRE2 as shared libraries and static libraries, +as long as the operating system supports shared libraries. Shared library +support relies on the "libtool" script which is built as part of the +"configure" process. + +The libtool script is used to compile and link both shared and static +libraries. They are placed in a subdirectory called .libs when they are newly +built. The programs pcre2test and pcre2grep are built to use these uninstalled +libraries (by means of wrapper scripts in the case of shared libraries). When +you use "make install" to install shared libraries, pcre2grep and pcre2test are +automatically re-built to use the newly installed shared libraries before being +installed themselves. However, the versions left in the build directory still +use the uninstalled libraries. + +To build PCRE2 using static libraries only you must use --disable-shared when +configuring it. For example: + +./configure --prefix=/usr/gnu --disable-shared + +Then run "make" in the usual way. Similarly, you can use --disable-static to +build only shared libraries. Note, however, that when you build only static +libraries, binary programs such as pcre2test and pcre2grep may still be +dynamically linked with other libraries (for example, libc) unless you set +LDFLAGS to --static when running "configure". + + +Cross-compiling using autotools +------------------------------- + +You can specify CC and CFLAGS in the normal way to the "configure" command, in +order to cross-compile PCRE2 for some other host. However, you should NOT +specify --enable-rebuild-chartables, because if you do, the pcre2_dftables.c +source file is compiled and run on the local host, in order to generate the +inbuilt character tables (the pcre2_chartables.c file). This will probably not +work, because pcre2_dftables.c needs to be compiled with the local compiler, +not the cross compiler. + +When --enable-rebuild-chartables is not specified, pcre2_chartables.c is +created by making a copy of pcre2_chartables.c.dist, which is a default set of +tables that assumes ASCII code. Cross-compiling with the default tables should +not be a problem. + +If you need to modify the character tables when cross-compiling, you should +move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by +hand and run it on the local host to make a new version of +pcre2_chartables.c.dist. See the pcre2build section "Creating character tables +at build time" for more details. + + +Making new tarballs +------------------- + +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. + +If you have modified any of the man page sources in the doc directory, you +should first run the maint/PrepareRelease script before making a distribution. +This script creates the .txt and HTML forms of the documentation from the man +pages. + + +Testing PCRE2 +------------- + +To test the basic PCRE2 library on a Unix-like system, run the RunTest script. +There is another script called RunGrepTest that tests the pcre2grep command. +When the 8-bit library is built, a test program for the POSIX wrapper, called +pcre2posix_test, is compiled, and when JIT support is enabled, a test program +called pcre2_jit_test is built. The scripts and the program tests are all run +when you obey "make check". For other environments, see the instructions in +NON-AUTOTOOLS-BUILD. + +The RunTest script runs the pcre2test test program (which is documented in its +own man page) on each of the relevant testinput files in the testdata +directory, and compares the output with the contents of the corresponding +testoutput files. RunTest uses a file called testtry to hold the main output +from pcre2test. Other files whose names begin with "test" are used as working +files in some tests. + +Some tests are relevant only when certain build-time options were selected. For +example, the tests for UTF-8/16/32 features are run only when Unicode support +is available. RunTest outputs a comment when it skips a test. + +Many (but not all) of the tests that are not skipped are run twice if JIT +support is available. On the second run, JIT compilation is forced. This +testing can be suppressed by putting "-nojit" on the RunTest command line. + +The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit +libraries that are enabled. If you want to run just one set of tests, call +RunTest with either the -8, -16 or -32 option. + +If valgrind is installed, you can run the tests under it by putting "-valgrind" +on the RunTest command line. To run pcre2test on just one or more specific test +files, give their numbers as arguments to RunTest, for example: + + RunTest 2 7 11 + +You can also specify ranges of tests such as 3-6 or 3- (meaning 3 to the +end), or a number preceded by ~ to exclude a test. For example: + + Runtest 3-15 ~10 + +This runs tests 3 to 15, excluding test 10, and just ~13 runs all the tests +except test 13. Whatever order the arguments are in, the tests are always run +in numerical order. + +You can also call RunTest with the single argument "list" to cause it to output +a list of tests. + +The test sequence starts with "test 0", which is a special test that has no +input file, and whose output is not checked. This is because it will be +different on different hardware and with different configurations. The test +exists in order to exercise some of pcre2test's code that would not otherwise +be run. + +Tests 1 and 2 can always be run, as they expect only plain text strings (not +UTF) and make no use of Unicode properties. The first test file can be fed +directly into the perltest.sh script to check that Perl gives the same results. +The only difference you should see is in the first few lines, where the Perl +version is given instead of the PCRE2 version. The second set of tests check +auxiliary functions, error detection, and run-time flags that are specific to +PCRE2. It also uses the debugging flags to check some of the internals of +pcre2_compile(). + +If you build PCRE2 with a locale setting that is not the standard C locale, the +character tables may be different (see next paragraph). In some cases, this may +cause failures in the second set of tests. For example, in a locale where the +isprint() function yields TRUE for characters in the range 128-255, the use of +[:isascii:] inside a character class defines a different set of characters, and +this shows up in this test as a difference in the compiled code, which is being +listed for checking. For example, where the comparison test output contains +[\x00-\x7f] the test might contain [\x00-\xff], and similarly in some other +cases. This is not a bug in PCRE2. + +Test 3 checks pcre2_maketables(), the facility for building a set of character +tables for a specific locale and using them instead of the default tables. The +script uses the "locale" command to check for the availability of the "fr_FR", +"french", or "fr" locale, and uses the first one that it finds. If the "locale" +command fails, or if its output doesn't include "fr_FR", "french", or "fr" in +the list of available locales, the third test cannot be run, and a comment is +output to say why. If running this test produces an error like this: + + ** Failed to set locale "fr_FR" + +it means that the given locale is not available on your system, despite being +listed by "locale". This does not mean that PCRE2 is broken. There are three +alternative output files for the third test, because three different versions +of the French locale have been encountered. The test passes if its output +matches any one of them. + +Tests 4 and 5 check UTF and Unicode property support, test 4 being compatible +with the perltest.sh script, and test 5 checking PCRE2-specific things. + +Tests 6 and 7 check the pcre2_dfa_match() alternative matching function, in +non-UTF mode and UTF-mode with Unicode property support, respectively. + +Test 8 checks some internal offsets and code size features, but it is run only +when Unicode support is enabled. The output is different in 8-bit, 16-bit, and +32-bit modes and for different link sizes, so there are different output files +for each mode and link size. + +Tests 9 and 10 are run only in 8-bit mode, and tests 11 and 12 are run only in +16-bit and 32-bit modes. These are tests that generate different output in +8-bit mode. Each pair are for general cases and Unicode support, respectively. + +Test 13 checks the handling of non-UTF characters greater than 255 by +pcre2_dfa_match() in 16-bit and 32-bit modes. + +Test 14 contains some special UTF and UCP tests that give different output for +different code unit widths. + +Test 15 contains a number of tests that must not be run with JIT. They check, +among other non-JIT things, the match-limiting features of the interpretive +matcher. + +Test 16 is run only when JIT support is not available. It checks that an +attempt to use JIT has the expected behaviour. + +Test 17 is run only when JIT support is available. It checks JIT complete and +partial modes, match-limiting under JIT, and other JIT-specific features. + +Tests 18 and 19 are run only in 8-bit mode. They check the POSIX interface to +the 8-bit library, without and with Unicode support, respectively. + +Test 20 checks the serialization functions by writing a set of compiled +patterns to a file, and then reloading and checking them. + +Tests 21 and 22 test \C support when the use of \C is not locked out, without +and with UTF support, respectively. Test 23 tests \C when it is locked out. + +Tests 24 and 25 test the experimental pattern conversion functions, without and +with UTF support, respectively. + +Test 26 checks Unicode property support using tests that are generated +automatically from the Unicode data tables. + + +Character tables +---------------- + +For speed, PCRE2 uses four tables for manipulating and identifying characters +whose code point values are less than 256. By default, a set of tables that is +built into the library is used. The pcre2_maketables() function can be called +by an application to create a new set of tables in the current locale. This are +passed to PCRE2 by calling pcre2_set_character_tables() to put a pointer into a +compile context. + +The source file called pcre2_chartables.c contains the default set of tables. +By default, this is created as a copy of pcre2_chartables.c.dist, which +contains tables for ASCII coding. However, if --enable-rebuild-chartables is +specified for ./configure, a new version of pcre2_chartables.c is built by the +program pcre2_dftables (compiled from pcre2_dftables.c), which uses the ANSI C +character handling functions such as isalnum(), isalpha(), isupper(), +islower(), etc. to build the table sources. This means that the default C +locale that is set for your system will control the contents of these default +tables. You can change the default tables by editing pcre2_chartables.c and +then re-building PCRE2. If you do this, you should take care to ensure that the +file does not get automatically re-generated. The best way to do this is to +move pcre2_chartables.c.dist out of the way and replace it with your customized +tables. + +When the pcre2_dftables program is run as a result of specifying +--enable-rebuild-chartables, it uses the default C locale that is set on your +system. It does not pay attention to the LC_xxx environment variables. In other +words, it uses the system's default locale rather than whatever the compiling +user happens to have set. If you really do want to build a source set of +character tables in a locale that is specified by the LC_xxx variables, you can +run the pcre2_dftables program by hand with the -L option. For example: + + ./pcre2_dftables -L pcre2_chartables.c.special + +The second argument names the file where the source code for the tables is +written. The first two 256-byte tables provide lower casing and case flipping +functions, respectively. The next table consists of a number of 32-byte bit +maps which identify certain character classes such as digits, "word" +characters, white space, etc. These are used when building 32-byte bit maps +that represent character classes for code points less than 256. The final +256-byte table has bits indicating various character types, as follows: + + 1 white space character + 2 letter + 4 lower case letter + 8 decimal digit + 16 alphanumeric or '_' + +You can also specify -b (with or without -L) when running pcre2_dftables. This +causes the tables to be written in binary instead of as source code. A set of +binary tables can be loaded into memory by an application and passed to +pcre2_compile() in the same way as tables created dynamically by calling +pcre2_maketables(). The tables are just a string of bytes, independent of +hardware characteristics such as endianness. This means they can be bundled +with an application that runs in different environments, to ensure consistent +behaviour. + +See also the pcre2build section "Creating character tables at build time". + + +File manifest +------------- + +The distribution should contain the files listed below. + +(A) Source files for the PCRE2 library functions and their headers are found in + the src directory: + + src/pcre2_dftables.c auxiliary program for building pcre2_chartables.c + when --enable-rebuild-chartables is specified + + src/pcre2_chartables.c.dist a default set of character tables that assume + ASCII coding; unless --enable-rebuild-chartables is + specified, used by copying to pcre2_chartables.c + + src/pcre2posix.c ) + src/pcre2_auto_possess.c ) + src/pcre2_chkdint.c ) + src/pcre2_compile.c ) + src/pcre2_compile_class.c ) + src/pcre2_config.c ) + src/pcre2_context.c ) + src/pcre2_convert.c ) + src/pcre2_dfa_match.c ) + src/pcre2_error.c ) + src/pcre2_extuni.c ) + src/pcre2_find_bracket.c ) + src/pcre2_jit_compile.c ) + src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_misc.c ) and some internal functions that they use + src/pcre2_maketables.c ) + src/pcre2_match.c ) + src/pcre2_match_data.c ) + src/pcre2_newline.c ) + src/pcre2_ord2utf.c ) + src/pcre2_pattern_info.c ) + src/pcre2_script_run.c ) + src/pcre2_serialize.c ) + src/pcre2_string_utils.c ) + src/pcre2_study.c ) + src/pcre2_substitute.c ) + src/pcre2_substring.c ) + src/pcre2_tables.c ) + src/pcre2_ucd.c ) + src/pcre2_ucptables.c ) + src/pcre2_valid_utf.c ) + src/pcre2_xclass.c ) + + src/pcre2_printint.c debugging function that is used by pcre2test, + src/pcre2_fuzzsupport.c function for (optional) fuzzing support + + src/config.h.in template for config.h, when built by "configure" + src/pcre2.h.in template for pcre2.h when built by "configure" + src/pcre2posix.h header for the external POSIX wrapper API + src/pcre2_compile.h header for internal use + src/pcre2_internal.h header for internal use + src/pcre2_intmodedep.h a mode-specific internal header + src/pcre2_jit_char_inc.h header used by JIT + src/pcre2_jit_neon_inc.h header used by JIT + src/pcre2_jit_simd_inc.h header used by JIT + src/pcre2_ucp.h header for Unicode property handling + src/pcre2_util.h header for internal utils + + deps/sljit/sljit_src/* source files for the JIT compiler + +(B) Source files for programs that use PCRE2: + + src/pcre2demo.c simple demonstration of coding calls to PCRE2 + src/pcre2grep.c source of a grep utility that uses PCRE2 + src/pcre2test.c comprehensive test program + src/pcre2_jit_test.c JIT test program + src/pcre2posix_test.c POSIX wrapper API test program + +(C) Auxiliary files: + + AUTHORS.md information about the authors of PCRE2 + ChangeLog log of changes to the code + HACKING some notes about the internals of PCRE2 + INSTALL generic installation instructions + LICENCE.md conditions for the use of PCRE2 + COPYING the same, using GNU's standard name + SECURITY.md information on reporting vulnerabilities + Makefile.in ) template for Unix Makefile, which is built by + ) "configure" + Makefile.am ) the automake input that was used to create + ) Makefile.in + NEWS important changes in this release + NON-AUTOTOOLS-BUILD notes on building PCRE2 without using autotools + README this file + RunTest a Unix shell script for running tests + RunGrepTest a Unix shell script for pcre2grep tests + RunTest.bat a Windows batch file for running tests + RunGrepTest.bat a Windows batch file for pcre2grep tests + aclocal.m4 m4 macros (generated by "aclocal") + m4/* m4 macros (used by autoconf) + configure a configuring shell script (built by autoconf) + configure.ac ) the autoconf input that was used to build + ) "configure" and config.h + doc/*.3 man page sources for PCRE2 + doc/*.1 man page sources for pcre2grep and pcre2test + doc/html/* HTML documentation + doc/pcre2.txt plain text version of the man pages + doc/pcre2-config.txt plain text documentation of pcre2-config script + doc/pcre2grep.txt plain text documentation of grep utility program + doc/pcre2test.txt plain text documentation of test program + libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config + libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config + libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config + libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config + ar-lib ) + config.guess ) + config.sub ) + depcomp ) helper tools generated by libtool and + compile ) automake, used internally by ./configure + install-sh ) + ltmain.sh ) + missing ) + test-driver ) + perltest.sh Script for running a Perl test program + pcre2-config.in source of script which retains PCRE2 information + testdata/testinput* test data for main library tests + testdata/testoutput* expected test results + testdata/grep* input and output for pcre2grep tests + testdata/* other supporting test files + +(D) Auxiliary files for CMake support + + cmake/COPYING-CMAKE-SCRIPTS + cmake/FindEditline.cmake + cmake/FindReadline.cmake + cmake/pcre2-config-version.cmake.in + cmake/pcre2-config.cmake.in + CMakeLists.txt + config-cmake.h.in + +(E) Auxiliary files for building PCRE2 "by hand" + + src/pcre2.h.generic ) a version of the public PCRE2 header file + ) for use in non-"configure" environments + src/config.h.generic ) a version of config.h for use in non-"configure" + ) environments + +(F) Auxiliary files for building PCRE2 using other build systems + + BUILD.bazel ) + MODULE.bazel ) files used by the Bazel build system + WORKSPACE.bazel ) + build.zig file used by zig's build system + +(G) Auxiliary files for building PCRE2 under OpenVMS + + vms/configure.com ) + vms/openvms_readme.txt ) These files were contributed by a PCRE2 user. + vms/pcre2.h_patch ) + vms/stdint.h ) + +============================== +Last updated: 18 December 2024 +============================== + diff --git a/3rd/pcre2/doc/html/index.html b/3rd/pcre2/doc/html/index.html new file mode 100644 index 00000000..2d81b678 --- /dev/null +++ b/3rd/pcre2/doc/html/index.html @@ -0,0 +1,327 @@ + + + +PCRE2 specification + + +

Perl-compatible Regular Expressions (revised API: PCRE2)

+

+The HTML documentation for PCRE2 consists of a number of pages that are listed +below in alphabetical order. If you are new to PCRE2, please read the first one +first. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2  Introductory page
pcre2-config  Information about the installation configuration
pcre2api  PCRE2's native API
pcre2build  Building PCRE2
pcre2callout  The callout facility
pcre2compat  Compability with Perl
pcre2convert  Experimental foreign pattern conversion functions
pcre2demo  A demonstration C program that uses the PCRE2 library
pcre2grep  The pcre2grep command
pcre2jit  Discussion of the just-in-time optimization support
pcre2limits  Details of size and other limits
pcre2matching  Discussion of the two matching algorithms
pcre2partial  Using PCRE2 for partial matching
pcre2pattern  Specification of the regular expressions supported by PCRE2
pcre2perform  Some comments on performance
pcre2posix  The POSIX API to the PCRE2 8-bit library
pcre2sample  Discussion of the pcre2demo program
pcre2serialize  Serializing functions for saving precompiled patterns
pcre2syntax  Syntax quick-reference summary
pcre2test  The pcre2test command for testing PCRE2
pcre2unicode  Discussion of Unicode and UTF-8/UTF-16/UTF-32 support
+ +

+There are also individual pages that summarize the interface for each function +in the library. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2_callout_enumerate  Enumerate callouts in a compiled pattern
pcre2_code_copy  Copy a compiled pattern
pcre2_code_copy_with_tables  Copy a compiled pattern and its character tables
pcre2_code_free  Free a compiled pattern
pcre2_compile  Compile a regular expression pattern
pcre2_compile_context_copy  Copy a compile context
pcre2_compile_context_create  Create a compile context
pcre2_compile_context_free  Free a compile context
pcre2_config  Show build-time configuration options
pcre2_convert_context_copy  Copy a convert context
pcre2_convert_context_create  Create a convert context
pcre2_convert_context_free  Free a convert context
pcre2_converted_pattern_free  Free converted foreign pattern
pcre2_dfa_match  Match a compiled pattern to a subject string + (DFA algorithm; not Perl compatible)
pcre2_general_context_copy  Copy a general context
pcre2_general_context_create  Create a general context
pcre2_general_context_free  Free a general context
pcre2_get_error_message  Get textual error message for error number
pcre2_get_mark  Get a (*MARK) name
pcre2_get_match_data_size  Get the size of a match data block
pcre2_get_ovector_count  Get the ovector count
pcre2_get_ovector_pointer  Get a pointer to the ovector
pcre2_get_startchar  Get the starting character offset
pcre2_jit_compile  Process a compiled pattern with the JIT compiler
pcre2_jit_free_unused_memory  Free unused JIT memory
pcre2_jit_match  Fast path interface to JIT matching
pcre2_jit_stack_assign  Assign stack for JIT matching
pcre2_jit_stack_create  Create a stack for JIT matching
pcre2_jit_stack_free  Free a JIT matching stack
pcre2_maketables  Build character tables in current locale
pcre2_maketables_free  Free character tables
pcre2_match  Match a compiled pattern to a subject string + (Perl compatible)
pcre2_match_context_copy  Copy a match context
pcre2_match_context_create  Create a match context
pcre2_match_context_free  Free a match context
pcre2_match_data_create  Create a match data block
pcre2_match_data_create_from_pattern  Create a match data block getting size from pattern
pcre2_match_data_free  Free a match data block
pcre2_pattern_convert  Experimental foreign pattern converter
pcre2_pattern_info  Extract information about a pattern
pcre2_serialize_decode  Decode serialized compiled patterns
pcre2_serialize_encode  Serialize compiled patterns for save/restore
pcre2_serialize_free  Free serialized compiled patterns
pcre2_serialize_get_number_of_codes  Get number of serialized compiled patterns
pcre2_set_bsr  Set \R convention
pcre2_set_callout  Set up a callout function
pcre2_set_character_tables  Set character tables
pcre2_set_compile_extra_options  Set compile time extra options
pcre2_set_compile_recursion_guard  Set up a compile recursion guard function
pcre2_set_depth_limit  Set the match backtracking depth limit
pcre2_set_glob_escape  Set glob escape character
pcre2_set_glob_separator  Set glob separator character
pcre2_set_heap_limit  Set the match backtracking heap limit
pcre2_set_match_limit  Set the match limit
pcre2_set_max_pattern_compiled_length  Set the maximum length of a compiled pattern
pcre2_set_max_pattern_length  Set the maximum length of a pattern
pcre2_set_max_varlookbehind  Set the maximum match length for a variable-length lookbehind
pcre2_set_newline  Set the newline convention
pcre2_set_offset_limit  Set the offset limit
pcre2_set_optimize  Set an optimization directive
pcre2_set_parens_nest_limit  Set the parentheses nesting limit
pcre2_set_recursion_limit  Obsolete: use pcre2_set_depth_limit
pcre2_set_recursion_memory_management  Obsolete function that (from 10.30 onwards) does nothing
pcre2_set_substitute_callout  Set a substitution callout function
pcre2_set_substitute_case_callout  Set a substitution case callout function
pcre2_substitute  Match a compiled pattern to a subject string and do + substitutions
pcre2_substring_copy_byname  Extract named substring into given buffer
pcre2_substring_copy_bynumber  Extract numbered substring into given buffer
pcre2_substring_free  Free extracted substring
pcre2_substring_get_byname  Extract named substring into new memory
pcre2_substring_get_bynumber  Extract numbered substring into new memory
pcre2_substring_length_byname  Find length of named substring
pcre2_substring_length_bynumber  Find length of numbered substring
pcre2_substring_list_free  Free list of extracted substrings
pcre2_substring_list_get  Extract all substrings into new memory
pcre2_substring_nametable_scan  Find table entries for given string name
pcre2_substring_number_from_name  Convert captured string name to number
+ + + diff --git a/3rd/pcre2/doc/html/pcre2-config.html b/3rd/pcre2/doc/html/pcre2-config.html new file mode 100644 index 00000000..b71d7602 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2-config.html @@ -0,0 +1,102 @@ + + +pcre2-config specification + + +

pcre2-config man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+pcre2-config [--prefix] [--exec-prefix] [--version] + [--libs8] [--libs16] [--libs32] [--libs-posix] + [--cflags] [--cflags-posix] +

+
DESCRIPTION
+

+pcre2-config returns the configuration of the installed PCRE2 libraries +and the options required to compile a program to use them. Some of the options +apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are +not available for libraries that have not been built. If an unavailable option +is encountered, the "usage" information is output. +

+
OPTIONS
+

+--prefix +Writes the directory prefix used in the PCRE2 installation for architecture +independent files (/usr on many systems, /usr/local on some +systems) to the standard output. +

+

+--exec-prefix +Writes the directory prefix used in the PCRE2 installation for architecture +dependent files (normally the same as --prefix) to the standard output. +

+

+--version +Writes the version number of the installed PCRE2 libraries to the standard +output. +

+

+--libs8 +Writes to the standard output the command line options required to link +with the 8-bit PCRE2 library (-lpcre2-8 on many systems). +

+

+--libs16 +Writes to the standard output the command line options required to link +with the 16-bit PCRE2 library (-lpcre2-16 on many systems). +

+

+--libs32 +Writes to the standard output the command line options required to link +with the 32-bit PCRE2 library (-lpcre2-32 on many systems). +

+

+--libs-posix +Writes to the standard output the command line options required to link with +PCRE2's POSIX API wrapper library (-lpcre2-posix -lpcre2-8 on many +systems). +

+

+--cflags +Writes to the standard output the command line options required to compile +files that use PCRE2 (this may include some -I options, but is blank on +many systems). +

+

+--cflags-posix +Writes to the standard output the command line options required to compile +files that use PCRE2's POSIX API wrapper library (this may include some +-I options, but is blank on many systems). +

+
SEE ALSO
+

+pcre2(3) +

+
AUTHOR
+

+This manual page was originally written by Mark Baker for the Debian GNU/Linux +system. It has been subsequently revised as a generic PCRE2 man page. +

+
REVISION
+

+Last updated: 28 September 2014 +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2.html b/3rd/pcre2/doc/html/pcre2.html new file mode 100644 index 00000000..e72b6b1c --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2.html @@ -0,0 +1,214 @@ + + +pcre2 specification + + +

pcre2 man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
INTRODUCTION
+

+PCRE2 is the name used for a revised API for the PCRE library, which is a set +of functions, written in C, that implement regular expression pattern matching +using the same syntax and semantics as Perl, with just a few differences. After +nearly two decades, the limitations of the original API were making development +increasingly difficult. The new API is more extensible, and it was simplified +by abolishing the separate "study" optimizing function; in PCRE2, patterns are +automatically optimized where possible. Since forking from PCRE1, the code has +been extensively refactored and new features introduced. The old library is now +obsolete and is no longer maintained. +

+

+As well as Perl-style regular expression patterns, some features that appeared +in Python and the original PCRE before they appeared in Perl are available +using the Python syntax. There is also some support for one or two .NET and +Oniguruma syntax items, and there are options for requesting some minor changes +that give better ECMAScript (aka JavaScript) compatibility. +

+

+The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit, +or 32-bit code units, which means that up to three separate libraries may be +installed, one for each code unit size. The size of code unit is not related to +the bit size of the underlying hardware. In a 64-bit environment that also +supports 32-bit applications, versions of PCRE2 that are compiled in both +64-bit and 32-bit modes may be needed. +

+

+The original work to extend PCRE to 16-bit and 32-bit code units was done by +Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings +can be interpreted either as one character per code unit, or as UTF-encoded +Unicode, with support for Unicode general category properties. Unicode support +is optional at build time (but is the default). However, processing strings as +UTF code units must be enabled explicitly at run time. The version of Unicode +in use can be discovered by running +

+  pcre2test -C
+
+

+

+The three libraries contain identical sets of functions, with names ending in +_8, _16, or _32, respectively (for example, pcre2_compile_8()). However, +by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just +one code unit width can be written using generic names such as +pcre2_compile(), and the documentation is written assuming that this is +the case. +

+

+In addition to the Perl-compatible matching function, PCRE2 contains an +alternative function that matches the same compiled patterns in a different +way. In certain circumstances, the alternative function has some advantages. +For a discussion of the two matching algorithms, see the +pcre2matching +page. +

+

+Details of exactly which Perl regular expression features are and are not +supported by PCRE2 are given in separate documents. See the +pcre2pattern +and +pcre2compat +pages. There is a syntax summary in the +pcre2syntax +page. +

+

+Some features of PCRE2 can be included, excluded, or changed when the library +is built. The +pcre2_config() +function makes it possible for a client to discover which features are +available. The features themselves are described in the +pcre2build +page. Documentation about building PCRE2 for various operating systems can be +found in the +README +and +NON-AUTOTOOLS_BUILD +files in the source distribution. +

+

+The libraries contains a number of undocumented internal functions and data +tables that are used by more than one of the exported external functions, but +which are not intended for use by external callers. Their names all begin with +"_pcre2", which hopefully will not provoke any name clashes. In some +environments, it is possible to control which external symbols are exported +when a shared library is built, and in these cases the undocumented symbols are +not exported. +

+
SECURITY CONSIDERATIONS
+

+If you are using PCRE2 in a non-UTF application that permits users to supply +arbitrary patterns for compilation, you should be aware of a feature that +allows users to turn on UTF support from within a pattern. For example, an +8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets +patterns and subjects as strings of UTF-8 code units instead of individual +8-bit characters. This causes both the pattern and any data against which it is +matched to be checked for UTF-8 validity. If the data string is very long, such +a check might use sufficiently many resources as to cause your application to +lose performance. +

+

+One way of guarding against this possibility is to use the +pcre2_pattern_info() function to check the compiled pattern's options for +PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling +pcre2_compile(). This causes a compile time error if the pattern contains +a UTF-setting sequence. +

+

+The use of Unicode properties for character types such as \d can also be +enabled from within the pattern, by specifying "(*UCP)". This feature can be +disallowed by setting the PCRE2_NEVER_UCP option. +

+

+If your application is one that supports UTF, be aware that validity checking +can take time. If the same data string is to be matched many times, you can use +the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid +running redundant checks. +

+

+The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead to +problems, because it may leave the current matching point in the middle of a +multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an +application to lock out the use of \C, causing a compile-time error if it is +encountered. It is also possible to build PCRE2 with the use of \C permanently +disabled. +

+

+Another way that performance can be hit is by running a pattern that has a very +large search tree against a string that will never match. Nested unlimited +repeats in a pattern are a common example. PCRE2 provides some protection +against this: see the pcre2_set_match_limit() function in the +pcre2api +page. There is a similar function called pcre2_set_depth_limit() that can +be used to restrict the amount of memory that is used. +

+
USER DOCUMENTATION
+

+The user documentation for PCRE2 comprises a number of different sections. In +the "man" format, each of these is a separate "man page". In the HTML format, +each is a separate page, linked from the index page. In the plain text format, +the descriptions of the pcre2grep and pcre2test programs are in +files called pcre2grep.txt and pcre2test.txt, respectively. The +remaining sections, except for the pcre2demo section (which is a program +listing), and the short pages for individual functions, are concatenated in +pcre2.txt, for ease of searching. The sections are as follows: +

+  pcre2              this document
+  pcre2-config       show PCRE2 installation configuration information
+  pcre2api           details of PCRE2's native C API
+  pcre2build         building PCRE2
+  pcre2callout       details of the pattern callout feature
+  pcre2compat        discussion of Perl compatibility
+  pcre2convert       details of pattern conversion functions
+  pcre2demo          a demonstration C program that uses PCRE2
+  pcre2grep          description of the pcre2grep command (8-bit only)
+  pcre2jit           discussion of just-in-time optimization support
+  pcre2limits        details of size and other limits
+  pcre2matching      discussion of the two matching algorithms
+  pcre2partial       details of the partial matching facility
+  pcre2pattern       syntax and semantics of supported regular expression patterns
+  pcre2perform       discussion of performance issues
+  pcre2posix         the POSIX-compatible C API for the 8-bit library
+  pcre2sample        discussion of the pcre2demo program
+  pcre2serialize     details of pattern serialization
+  pcre2syntax        quick syntax reference
+  pcre2test          description of the pcre2test command
+  pcre2unicode       discussion of Unicode and UTF support
+
+In the "man" and HTML formats, there is also a short page for each C library +function, listing its arguments and results. +

+
AUTHORS
+

+The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg. +

+

+PCRE2 was written by Philip Hazel, of the University Computing Service, +Cambridge, England. Many others have also contributed. +

+

+To contact the maintainers, please use the GitHub issues tracker or PCRE2 +mailing list, as described at the project page: +https://github.com/PCRE2Project/pcre2 +

+
REVISION
+

+Last updated: 18 December 2024 +
+Copyright © 1997-2021 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_callout_enumerate.html b/3rd/pcre2/doc/html/pcre2_callout_enumerate.html new file mode 100644 index 00000000..505ea7b2 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_callout_enumerate.html @@ -0,0 +1,63 @@ + + +pcre2_callout_enumerate specification + + +

pcre2_callout_enumerate man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function scans a compiled regular expression and calls the callback() +function for each callout within the pattern. The yield of the function is zero +for success and non-zero otherwise. The arguments are: +

+  code           Points to the compiled pattern
+  callback       The callback function
+  callout_data   User data that is passed to the callback
+
+The callback() function is passed a pointer to a data block containing +the following fields (not necessarily in this order): +
+  uint32_t   version                Block version number
+  uint32_t   callout_number         Number for numbered callouts
+  PCRE2_SIZE pattern_position       Offset to next item in pattern
+  PCRE2_SIZE next_item_length       Length of next item in pattern
+  PCRE2_SIZE callout_string_offset  Offset to string within pattern
+  PCRE2_SIZE callout_string_length  Length of callout string
+  PCRE2_SPTR callout_string         Points to callout string or is NULL
+
+The second argument passed to the callback() function is the callout data +that was passed to pcre2_callout_enumerate(). The callback() +function must return zero for success. Any other value causes the pattern scan +to stop, with the value being passed back as the result of +pcre2_callout_enumerate(). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_code_copy.html b/3rd/pcre2/doc/html/pcre2_code_copy.html new file mode 100644 index 00000000..667d7b7f --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_code_copy.html @@ -0,0 +1,43 @@ + + +pcre2_code_copy specification + + +

pcre2_code_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_code *pcre2_code_copy(const pcre2_code *code); +

+
+DESCRIPTION +
+

+This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +pcre2_jit_compile(), the copy can be used only for non-JIT matching. The +pointer to the character tables is copied, not the tables themselves (see +pcre2_code_copy_with_tables()). The yield of the function is NULL if +code is NULL or if sufficient memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_code_copy_with_tables.html b/3rd/pcre2/doc/html/pcre2_code_copy_with_tables.html new file mode 100644 index 00000000..67b2e1ff --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_code_copy_with_tables.html @@ -0,0 +1,44 @@ + + +pcre2_code_copy_with_tables specification + + +

pcre2_code_copy_with_tables man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +

+
+DESCRIPTION +
+

+This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +pcre2_jit_compile(), the copy can be used only for non-JIT matching. +Unlike pcre2_code_copy(), a separate copy of the character tables is also +made, with the new code pointing to it. This memory will be automatically freed +when pcre2_code_free() is called. The yield of the function is NULL if +code is NULL or if sufficient memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_code_free.html b/3rd/pcre2/doc/html/pcre2_code_free.html new file mode 100644 index 00000000..ff302fcd --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_code_free.html @@ -0,0 +1,42 @@ + + +pcre2_code_free specification + + +

pcre2_code_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_code_free(pcre2_code *code); +

+
+DESCRIPTION +
+

+If code is NULL, this function does nothing. Otherwise, code must +point to a compiled pattern. This function frees its memory, including any +memory used by the JIT compiler. If the compiled pattern was created by a call +to pcre2_code_copy_with_tables(), the memory for the character tables is +also freed. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_compile.html b/3rd/pcre2/doc/html/pcre2_compile.html new file mode 100644 index 00000000..ee933f38 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_compile.html @@ -0,0 +1,120 @@ + + +pcre2_compile specification + + +

pcre2_compile man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); +

+
+DESCRIPTION +
+

+This function compiles a regular expression pattern into an internal form. Its +arguments are: +

+  pattern       A string containing expression to be compiled
+  length        The length of the string or PCRE2_ZERO_TERMINATED
+  options       Primary option bits
+  errorcode     Where to put an error code
+  erroffset     Where to put an error offset
+  ccontext      Pointer to a compile context or NULL
+
+The length of the pattern and any error offset that is returned are in code +units, not characters. A NULL pattern with zero length is treated as an empty +string. A compile context is needed only if you want to provide custom memory +allocation functions, or to provide an external function for system stack size +checking (see pcre2_set_compile_recursion_guard()), or to change one or +more of these parameters: +
+  What \R matches (Unicode newlines, or CR, LF, CRLF only);
+  PCRE2's character tables;
+  The newline character sequence;
+  The compile time nested parentheses limit;
+  The maximum pattern length (in code units) that is allowed;
+  The additional options bits.
+
+The primary option bits are: +
+  PCRE2_ANCHORED           Force pattern anchoring
+  PCRE2_ALLOW_EMPTY_CLASS  Allow empty classes
+  PCRE2_ALT_BSUX           Alternative handling of \u, \U, and \x
+  PCRE2_ALT_CIRCUMFLEX     Alternative handling of ^ in multiline mode
+  PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax
+  PCRE2_ALT_VERBNAMES      Process backslashes in verb names
+  PCRE2_AUTO_CALLOUT       Compile automatic callouts
+  PCRE2_CASELESS           Do caseless matching
+  PCRE2_DOLLAR_ENDONLY     $ not to match newline at end
+  PCRE2_DOTALL             . matches anything including NL
+  PCRE2_DUPNAMES           Allow duplicate names for subpatterns
+  PCRE2_ENDANCHORED        Pattern can match only at end of subject
+  PCRE2_EXTENDED           Ignore white space and # comments
+  PCRE2_FIRSTLINE          Force matching to be before newline
+  PCRE2_LITERAL            Pattern characters are all literal
+  PCRE2_MATCH_INVALID_UTF  Enable support for matching invalid UTF
+  PCRE2_MATCH_UNSET_BACKREF  Match unset backreferences
+  PCRE2_MULTILINE          ^ and $ match newlines within data
+  PCRE2_NEVER_BACKSLASH_C  Lock out the use of \C in patterns
+  PCRE2_NEVER_UCP          Lock out PCRE2_UCP, e.g. via (*UCP)
+  PCRE2_NEVER_UTF          Lock out PCRE2_UTF, e.g. via (*UTF)
+  PCRE2_NO_AUTO_CAPTURE    Disable numbered capturing paren-
+                            theses (named ones available)
+  PCRE2_NO_AUTO_POSSESS    Disable auto-possessification
+  PCRE2_NO_DOTSTAR_ANCHOR  Disable automatic anchoring for .*
+  PCRE2_NO_START_OPTIMIZE  Disable match-time start optimizations
+  PCRE2_NO_UTF_CHECK       Do not check the pattern for UTF validity
+                             (only relevant if PCRE2_UTF is set)
+  PCRE2_UCP                Use Unicode properties for \d, \w, etc.
+  PCRE2_UNGREEDY           Invert greediness of quantifiers
+  PCRE2_USE_OFFSET_LIMIT   Enable offset limit for unanchored matching
+  PCRE2_UTF                Treat pattern and subjects as UTF strings
+
+PCRE2 must be built with Unicode support (the default) in order to use +PCRE2_UTF, PCRE2_UCP and related options. +

+

+Additional options may be set in the compile context via the +pcre2_set_compile_extra_options +function. +

+

+If either of errorcode or erroroffset is NULL, the function returns +NULL immediately. Otherwise, the yield of this function is a pointer to a +private data structure that contains the compiled pattern, or NULL if an error +was detected. In the error case, a text error message can be obtained by +passing the value returned via the errorcode argument to the +pcre2_get_error_message() function. The offset (in code units) where the +error was encountered is returned via the erroroffset argument. +

+

+If there is no error, the value passed via errorcode returns the message +"no error" if passed to pcre2_get_error_message(), and the value passed +via erroroffset is zero. +

+

+There is a complete description of the PCRE2 native API, with more detail on +each option, in the +pcre2api +page, and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_compile_context_copy.html b/3rd/pcre2/doc/html/pcre2_compile_context_copy.html new file mode 100644 index 00000000..9e9884b8 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_compile_context_copy.html @@ -0,0 +1,41 @@ + + +pcre2_compile_context_copy specification + + +

pcre2_compile_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); +

+
+DESCRIPTION +
+

+This function makes a new copy of a compile context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_compile_context_create.html b/3rd/pcre2/doc/html/pcre2_compile_context_create.html new file mode 100644 index 00000000..5eacd4ec --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_compile_context_create.html @@ -0,0 +1,42 @@ + + +pcre2_compile_context_create specification + + +

pcre2_compile_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates and initializes a new compile context. If its argument is +NULL, malloc() is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_compile_context_free.html b/3rd/pcre2/doc/html/pcre2_compile_context_free.html new file mode 100644 index 00000000..b4159b11 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_compile_context_free.html @@ -0,0 +1,41 @@ + + +pcre2_compile_context_free specification + + +

pcre2_compile_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_compile_context_free(pcre2_compile_context *ccontext); +

+
+DESCRIPTION +
+

+This function frees the memory occupied by a compile context, using the memory +freeing function from the general context with which it was created, or +free() if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_config.html b/3rd/pcre2/doc/html/pcre2_config.html new file mode 100644 index 00000000..f05bd062 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_config.html @@ -0,0 +1,84 @@ + + +pcre2_config specification + + +

pcre2_config man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_config(uint32_t what, void *where); +

+
+DESCRIPTION +
+

+This function makes it possible for a client program to find out which optional +features are available in the version of the PCRE2 library it is using. The +arguments are as follows: +

+  what     A code specifying what information is required
+  where    Points to where to put the information
+
+If where is NULL, the function returns the amount of memory needed for +the requested information. When the information is a string, the value is in +code units; for other types of data it is in bytes. +

+

+If where is not NULL, for PCRE2_CONFIG_JITTARGET, +PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a +buffer that is large enough to hold the string. For all other codes it must +point to a uint32_t integer variable. The available codes are: +

+  PCRE2_CONFIG_BSR             Indicates what \R matches by default:
+                                 PCRE2_BSR_UNICODE
+                                 PCRE2_BSR_ANYCRLF
+  PCRE2_CONFIG_COMPILED_WIDTHS Which of 8/16/32 support was compiled
+  PCRE2_CONFIG_DEPTHLIMIT      Default backtracking depth limit
+  PCRE2_CONFIG_HEAPLIMIT       Default heap memory limit
+  PCRE2_CONFIG_JIT             Availability of just-in-time compiler support (1=yes 0=no)
+  PCRE2_CONFIG_JITTARGET       Information (a string) about the target architecture for the JIT compiler
+  PCRE2_CONFIG_LINKSIZE        Configured internal link size (2, 3, 4)
+  PCRE2_CONFIG_MATCHLIMIT      Default internal resource limit
+  PCRE2_CONFIG_NEVER_BACKSLASH_C  Whether or not \C is disabled
+  PCRE2_CONFIG_NEWLINE         Code for the default newline sequence:
+                                 PCRE2_NEWLINE_CR
+                                 PCRE2_NEWLINE_LF
+                                 PCRE2_NEWLINE_CRLF
+                                 PCRE2_NEWLINE_ANY
+                                 PCRE2_NEWLINE_ANYCRLF
+                                 PCRE2_NEWLINE_NUL
+  PCRE2_CONFIG_PARENSLIMIT     Default parentheses nesting limit
+  PCRE2_CONFIG_RECURSIONLIMIT  Obsolete: use PCRE2_CONFIG_DEPTHLIMIT
+  PCRE2_CONFIG_STACKRECURSE    Obsolete: always returns 0
+  PCRE2_CONFIG_UNICODE         Availability of Unicode support (1=yes 0=no)
+  PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string)
+  PCRE2_CONFIG_VERSION         The PCRE2 version (a string)
+
+The function yields a non-negative value on success or the negative value +PCRE2_ERROR_BADOPTION otherwise. This is also the result for the +PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is +requested, the function returns the number of code units used, including the +terminating zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_convert_context_copy.html b/3rd/pcre2/doc/html/pcre2_convert_context_copy.html new file mode 100644 index 00000000..3c44ac6d --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_convert_context_copy.html @@ -0,0 +1,40 @@ + + +pcre2_convert_context_copy specification + + +

pcre2_convert_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It makes a new copy of a convert context, using the memory allocation function +that was used for the original context. The result is NULL if the memory cannot +be obtained. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_convert_context_create.html b/3rd/pcre2/doc/html/pcre2_convert_context_create.html new file mode 100644 index 00000000..25647809 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_convert_context_create.html @@ -0,0 +1,41 @@ + + +pcre2_convert_context_create specification + + +

pcre2_convert_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It creates and initializes a new convert context. If its argument is +NULL, malloc() is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_convert_context_free.html b/3rd/pcre2/doc/html/pcre2_convert_context_free.html new file mode 100644 index 00000000..e9b142bf --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_convert_context_free.html @@ -0,0 +1,40 @@ + + +pcre2_convert_context_free specification + + +

pcre2_convert_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_convert_context_free(pcre2_convert_context *cvcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a convert context, using the memory +freeing function from the general context with which it was created, or +free() if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_converted_pattern_free.html b/3rd/pcre2/doc/html/pcre2_converted_pattern_free.html new file mode 100644 index 00000000..01d28d7a --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_converted_pattern_free.html @@ -0,0 +1,40 @@ + + +pcre2_converted_pattern_free specification + + +

pcre2_converted_pattern_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a converted pattern that was obtained by +calling pcre2_pattern_convert() with arguments that caused it to place +the converted pattern into newly obtained heap memory. If the argument is NULL, +the function returns immediately without doing anything. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_dfa_match.html b/3rd/pcre2/doc/html/pcre2_dfa_match.html new file mode 100644 index 00000000..0ae428c1 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_dfa_match.html @@ -0,0 +1,86 @@ + + +pcre2_dfa_match specification + + +

pcre2_dfa_match man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression against a given subject +string, using an alternative matching algorithm that scans the subject string +just once (except when processing lookaround assertions). This function is +not Perl-compatible (the Perl-compatible matching function is +pcre2_match()). The arguments for this function are: +

+  code         Points to the compiled pattern
+  subject      Points to the subject string
+  length       Length of the subject string
+  startoffset  Offset in the subject at which to start matching
+  options      Option bits
+  match_data   Points to a match data block, for results
+  mcontext     Points to a match context, or is NULL
+  workspace    Points to a vector of ints used as working space
+  wscount      Number of elements in the vector
+
+The size of output vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of parentheses in the +pattern. Using pcre2_match_data_create_from_pattern() to create the match +data block is therefore not advisable when using this function. +

+

+A match context is needed only if you want to set up a callout function or +specify the heap limit or the match or the recursion depth limits. The +length and startoffset values are code units, not characters. The +options are: +

+  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
+  PCRE2_ENDANCHORED       Pattern can match only at end of subject
+  PCRE2_NOTBOL            Subject is not the beginning of a line
+  PCRE2_NOTEOL            Subject is not the end of a line
+  PCRE2_NOTEMPTY          An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART  An empty string at the start of the subject is not a valid match
+  PCRE2_NO_UTF_CHECK      Do not check the subject for UTF validity (only relevant if PCRE2_UTF
+                           was set at compile time)
+  PCRE2_PARTIAL_HARD      Return PCRE2_ERROR_PARTIAL for a partial match even if there is a full match
+  PCRE2_PARTIAL_SOFT      Return PCRE2_ERROR_PARTIAL for a partial match if no full matches are found
+  PCRE2_DFA_RESTART       Restart after a partial match
+  PCRE2_DFA_SHORTEST      Return only the shortest match
+
+There are restrictions on what may appear in a pattern when using this matching +function. Details are given in the +pcre2matching +documentation. For details of partial matching, see the +pcre2partial +page. There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_general_context_copy.html b/3rd/pcre2/doc/html/pcre2_general_context_copy.html new file mode 100644 index 00000000..00185346 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_general_context_copy.html @@ -0,0 +1,42 @@ + + +pcre2_general_context_copy specification + + +

pcre2_general_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function makes a new copy of a general context, using the memory +allocation functions in the context, if set, to get the necessary memory. +Otherwise malloc() is used. The result is NULL if the memory cannot be +obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_general_context_create.html b/3rd/pcre2/doc/html/pcre2_general_context_create.html new file mode 100644 index 00000000..a1a165d7 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_general_context_create.html @@ -0,0 +1,44 @@ + + +pcre2_general_context_create specification + + +

pcre2_general_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); +

+
+DESCRIPTION +
+

+This function creates and initializes a general context. The arguments define +custom memory management functions and a data value that is passed to them when +they are called. The private_malloc() function is used to get memory for +the context. If either of the first two arguments is NULL, the system memory +management function is used. The result is NULL if no memory could be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_general_context_free.html b/3rd/pcre2/doc/html/pcre2_general_context_free.html new file mode 100644 index 00000000..9f335f57 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_general_context_free.html @@ -0,0 +1,40 @@ + + +pcre2_general_context_free specification + + +

pcre2_general_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_general_context_free(pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function frees the memory occupied by a general context, using the memory +freeing function within the context, if set. If the argument is NULL, the +function returns immediately without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_error_message.html b/3rd/pcre2/doc/html/pcre2_get_error_message.html new file mode 100644 index 00000000..70057600 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_error_message.html @@ -0,0 +1,51 @@ + + +pcre2_get_error_message specification + + +

pcre2_get_error_message man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +

+
+DESCRIPTION +
+

+This function provides a textual error message for each PCRE2 error code. +Compilation errors are positive numbers; UTF formatting errors and matching +errors are negative numbers. The arguments are: +

+  errorcode   an error code (positive or negative)
+  buffer      where to put the message
+  bufflen     the length of the buffer (code units)
+
+The function returns the length of the message in code units, excluding the +trailing zero, or the negative error code PCRE2_ERROR_NOMEMORY if the buffer is +too small. In this case, the returned message is truncated (but still with a +trailing zero). If errorcode does not contain a recognized error code +number, the negative value PCRE2_ERROR_BADDATA is returned. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_mark.html b/3rd/pcre2/doc/html/pcre2_get_mark.html new file mode 100644 index 00000000..88e63269 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_mark.html @@ -0,0 +1,47 @@ + + +pcre2_get_mark specification + + +

pcre2_get_mark man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+After a call of pcre2_match() that was passed the match block that is +this function's argument, this function returns a pointer to the last (*MARK), +(*PRUNE), or (*THEN) name that was encountered during the matching process. The +name is zero-terminated, and is within the compiled pattern. The length of the +name is in the preceding code unit. If no name is available, NULL is returned. +

+

+After a successful match, the name that is returned is the last one on the +matching path. After a failed match or a partial match, the last encountered +name is returned. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_match_data_heapframes_size.html b/3rd/pcre2/doc/html/pcre2_get_match_data_heapframes_size.html new file mode 100644 index 00000000..3c705c61 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_match_data_heapframes_size.html @@ -0,0 +1,40 @@ + + +pcre2_get_match_data_heapframes_size specification + + +

pcre2_get_match_data_heapframes_size man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns the size, in bytes, of the heapframes data block that is +owned by its argument. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_match_data_size.html b/3rd/pcre2/doc/html/pcre2_get_match_data_size.html new file mode 100644 index 00000000..113ecaab --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_match_data_size.html @@ -0,0 +1,39 @@ + + +pcre2_get_match_data_size specification + + +

pcre2_get_match_data_size man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns the size, in bytes, of the match data block that is its +argument. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_ovector_count.html b/3rd/pcre2/doc/html/pcre2_get_ovector_count.html new file mode 100644 index 00000000..05aacb6d --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_ovector_count.html @@ -0,0 +1,39 @@ + + +pcre2_get_ovector_count specification + + +

pcre2_get_ovector_count man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns the number of pairs of offsets in the ovector that forms +part of the given match data block. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_ovector_pointer.html b/3rd/pcre2/doc/html/pcre2_get_ovector_pointer.html new file mode 100644 index 00000000..ff6317ef --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_ovector_pointer.html @@ -0,0 +1,40 @@ + + +pcre2_get_ovector_pointer specification + + +

pcre2_get_ovector_pointer man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+This function returns a pointer to the vector of offsets that forms part of the +given match data block. The number of pairs can be found by calling +pcre2_get_ovector_count(). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_get_startchar.html b/3rd/pcre2/doc/html/pcre2_get_startchar.html new file mode 100644 index 00000000..d2c28b2a --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_get_startchar.html @@ -0,0 +1,44 @@ + + +pcre2_get_startchar specification + + +

pcre2_get_startchar man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+After a successful call of pcre2_match() that was passed the match block +that is this function's argument, this function returns the code unit offset of +the character at which the successful match started. For a non-partial match, +this can be different to the value of ovector[0] if the pattern contains +the \K escape sequence. After a partial match, however, this value is always +the same as ovector[0] because \K does not affect the result of a +partial match. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_jit_compile.html b/3rd/pcre2/doc/html/pcre2_jit_compile.html new file mode 100644 index 00000000..791dd0c3 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_jit_compile.html @@ -0,0 +1,74 @@ + + +pcre2_jit_compile specification + + +

pcre2_jit_compile man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_jit_compile(pcre2_code *code, uint32_t options); +

+
+DESCRIPTION +
+

+This function requests JIT compilation, which, if the just-in-time compiler is +available, further processes a compiled pattern into machine code that executes +much faster than the pcre2_match() interpretive matching function. Full +details are given in the +pcre2jit +documentation. +

+

+The availability of JIT support can be tested by calling +pcre2_compile_jit() with a single option PCRE2_JIT_TEST_ALLOC (the +code argument is ignored, so a NULL value is accepted). Such a call +returns zero if JIT is available and has a working allocator. Otherwise +it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate +executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not +compiled. +

+

+Otherwise, the first argument must be a pointer that was returned by a +successful call to pcre2_compile(), and the second must contain one or +more of the following bits: +

+  PCRE2_JIT_COMPLETE      compile code for full matching
+  PCRE2_JIT_PARTIAL_SOFT  compile code for soft partial matching
+  PCRE2_JIT_PARTIAL_HARD  compile code for hard partial matching
+
+There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been +superseded by the pcre2_compile() option PCRE2_MATCH_INVALID_UTF. The old +option is deprecated and may be removed in the future. +

+

+The yield of the function when called with any of the three options above is 0 +for success, or a negative error code otherwise. In particular, +PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown +bit is set in options. The function can also return PCRE2_ERROR_NOMEMORY +if JIT is unable to allocate executable memory for the compiler, even if it was +because of a system security restriction. In a few cases, the function may +return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_jit_free_unused_memory.html b/3rd/pcre2/doc/html/pcre2_jit_free_unused_memory.html new file mode 100644 index 00000000..7f37e583 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_jit_free_unused_memory.html @@ -0,0 +1,43 @@ + + +pcre2_jit_free_unused_memory specification + + +

pcre2_jit_free_unused_memory man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function frees unused JIT executable memory. The argument is a general +context, for custom memory management, or NULL for standard memory management. +JIT memory allocation retains some memory in order to improve future JIT +compilation speed. In low memory conditions, +pcre2_jit_free_unused_memory() can be used to cause this memory to be +freed. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_jit_match.html b/3rd/pcre2/doc/html/pcre2_jit_match.html new file mode 100644 index 00000000..56144ff9 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_jit_match.html @@ -0,0 +1,70 @@ + + +pcre2_jit_match specification + + +

pcre2_jit_match man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression that has been successfully +processed by the JIT compiler against a given subject string, using a matching +algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and +it bypasses some of the sanity checks that pcre2_match() applies. +

+

+In UTF mode, the subject string is not checked for UTF validity. Unless +PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an +invalid UTF string results in undefined behaviour. Your program may crash or +loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should only call pcre2_jit_match() in UTF mode if you are sure the +subject is valid. +

+

+The arguments for pcre2_jit_match() are exactly the same as for +pcre2_match(), +except that the subject string must be specified with a length; +PCRE2_ZERO_TERMINATED is not supported. +

+

+The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported +options are ignored. +

+

+The return values are the same as for pcre2_match() plus +PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested +that was not compiled. For details of partial matching, see the +pcre2partial +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the JIT API in the +pcre2jit +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_jit_stack_assign.html b/3rd/pcre2/doc/html/pcre2_jit_stack_assign.html new file mode 100644 index 00000000..4b3abb90 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_jit_stack_assign.html @@ -0,0 +1,75 @@ + + +pcre2_jit_stack_assign specification + + +

pcre2_jit_stack_assign man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); +

+
+DESCRIPTION +
+

+This function provides control over the memory used by JIT as a run-time stack +when pcre2_match() or pcre2_jit_match() is called with a pattern +that has been successfully processed by the JIT compiler. The information that +determines which stack is used is put into a match context that is subsequently +passed to a matching function. The arguments of this function are: +

+  mcontext       a pointer to a match context
+  callback       a callback function
+  callback_data  a JIT stack or a value to be passed to the callback
+
+

+

+If mcontext is NULL, the function returns immediately, without doing +anything. +

+

+If callback is NULL and callback_data is NULL, an internal 32KiB +block on the machine stack is used. +

+

+If callback is NULL and callback_data is not NULL, +callback_data must be a valid JIT stack, the result of calling +pcre2_jit_stack_create(). +

+

+If callback not NULL, it is called with callback_data as an +argument at the start of matching, in order to set up a JIT stack. If the +result is NULL, the internal 32KiB stack is used; otherwise the return value +must be a valid JIT stack, the result of calling +pcre2_jit_stack_create(). +

+

+You may safely use the same JIT stack for multiple patterns, as long as they +are all matched in the same thread. In a multithread application, each thread +must use its own JIT stack. For more details, see the +pcre2jit +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_jit_stack_create.html b/3rd/pcre2/doc/html/pcre2_jit_stack_create.html new file mode 100644 index 00000000..b9dc59d6 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_jit_stack_create.html @@ -0,0 +1,50 @@ + + +pcre2_jit_stack_create specification + + +

pcre2_jit_stack_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function is used to create a stack for use by the code compiled by the JIT +compiler. The first two arguments are a starting size for the stack, and a +maximum size to which it is allowed to grow. The final argument is a general +context, for memory allocation functions, or NULL for standard memory +allocation. The result can be passed to the JIT run-time code by calling +pcre2_jit_stack_assign() to associate the stack with a compiled pattern, +which can then be processed by pcre2_match() or pcre2_jit_match(). +A maximum stack size of 512KiB to 1MiB should be more than enough for any +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the +pcre2jit +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_jit_stack_free.html b/3rd/pcre2/doc/html/pcre2_jit_stack_free.html new file mode 100644 index 00000000..1d078d74 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_jit_stack_free.html @@ -0,0 +1,43 @@ + + +pcre2_jit_stack_free specification + + +

pcre2_jit_stack_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); +

+
+DESCRIPTION +
+

+This function is used to free a JIT stack that was created by +pcre2_jit_stack_create() when it is no longer needed. If the argument is +NULL, the function returns immediately without doing anything. For more +details, see the +pcre2jit +page. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_maketables.html b/3rd/pcre2/doc/html/pcre2_maketables.html new file mode 100644 index 00000000..19636545 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_maketables.html @@ -0,0 +1,48 @@ + + +pcre2_maketables specification + + +

pcre2_maketables man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function builds a set of character tables for character code points that +are less than 256. These can be passed to pcre2_compile() in a compile +context in order to override the internal, built-in tables (which were either +defaulted or made by pcre2_maketables() when PCRE2 was compiled). See the +pcre2_set_character_tables() +page. You might want to do this if you are using a non-standard locale. +

+

+If the argument is NULL, malloc() is used to get memory for the tables. +Otherwise it must point to a general context, which can supply pointers to a +custom memory manager. The function yields a pointer to the tables. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_maketables_free.html b/3rd/pcre2/doc/html/pcre2_maketables_free.html new file mode 100644 index 00000000..7316ab25 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_maketables_free.html @@ -0,0 +1,44 @@ + + +pcre2_maketables_free specification + + +

pcre2_maketables_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +

+
+DESCRIPTION +
+

+This function discards a set of character tables that were created by a call +to +pcre2_maketables(). +

+

+The gcontext parameter should match what was used in that call to +account for any custom allocators that might be in use; if it is NULL +the system free() is used. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match.html b/3rd/pcre2/doc/html/pcre2_match.html new file mode 100644 index 00000000..5584ae3d --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match.html @@ -0,0 +1,87 @@ + + +pcre2_match specification + + +

pcre2_match man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It returns +offsets to what it has matched and to captured substrings via the +match_data block, which can be processed by functions with names that +start with pcre2_get_ovector_...() or pcre2_substring_...(). The +return from pcre2_match() is one more than the highest numbered capturing +pair that has been set (for example, 1 if there are no captures), zero if the +vector of offsets is too small, or a negative error code for no match and other +errors. The function arguments are: +

+  code         Points to the compiled pattern
+  subject      Points to the subject string
+  length       Length of the subject string
+  startoffset  Offset in the subject at which to start matching
+  options      Option bits
+  match_data   Points to a match data block, for results
+  mcontext     Points to a match context, or is NULL
+
+A match context is needed only if you want to: +
+  Set up a callout function
+  Set a matching offset limit
+  Change the heap memory limit
+  Change the backtracking match limit
+  Change the backtracking depth limit
+  Set custom memory management specifically for the match
+
+The length and startoffset values are code units, not characters. +The length may be given as PCRE2_ZERO_TERMINATED for a subject that is +terminated by a binary zero code unit. The options are: +
+  PCRE2_ANCHORED          Match only at the first position
+  PCRE2_COPY_MATCHED_SUBJECT
+                          On success, make a private subject copy
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+                          Only useful in rare cases; use with care
+  PCRE2_ENDANCHORED       Pattern can match only at end of subject
+  PCRE2_NOTBOL            Subject string is not the beginning of a line
+  PCRE2_NOTEOL            Subject string is not the end of a line
+  PCRE2_NOTEMPTY          An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART  An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT            Do not use JIT matching
+  PCRE2_NO_UTF_CHECK      Do not check the subject for UTF validity (only relevant if PCRE2_UTF
+                           was set at compile time)
+  PCRE2_PARTIAL_HARD      Return PCRE2_ERROR_PARTIAL for a partial match even if there is a full match
+  PCRE2_PARTIAL_SOFT      Return PCRE2_ERROR_PARTIAL for a partial match if no full matches are found
+
+For details of partial matching, see the +pcre2partial +page. There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match_context_copy.html b/3rd/pcre2/doc/html/pcre2_match_context_copy.html new file mode 100644 index 00000000..4a719d69 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match_context_copy.html @@ -0,0 +1,41 @@ + + +pcre2_match_context_copy specification + + +

pcre2_match_context_copy man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function makes a new copy of a match context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match_context_create.html b/3rd/pcre2/doc/html/pcre2_match_context_create.html new file mode 100644 index 00000000..f7f27351 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match_context_create.html @@ -0,0 +1,42 @@ + + +pcre2_match_context_create specification + + +

pcre2_match_context_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates and initializes a new match context. If its argument is +NULL, malloc() is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match_context_free.html b/3rd/pcre2/doc/html/pcre2_match_context_free.html new file mode 100644 index 00000000..7f00ea9b --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match_context_free.html @@ -0,0 +1,41 @@ + + +pcre2_match_context_free specification + + +

pcre2_match_context_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_match_context_free(pcre2_match_context *mcontext); +

+
+DESCRIPTION +
+

+This function frees the memory occupied by a match context, using the memory +freeing function from the general context with which it was created, or +free() if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match_data_create.html b/3rd/pcre2/doc/html/pcre2_match_data_create.html new file mode 100644 index 00000000..c26c3b32 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match_data_create.html @@ -0,0 +1,50 @@ + + +pcre2_match_data_create specification + + +

pcre2_match_data_create man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates a new match data block, which is used for holding the +result of a match. The first argument specifies the number of pairs of offsets +that are required. These form the "output vector" (ovector) within the match +data block, and are used to identify the matched string and any captured +substrings when matching with pcre2_match(), or a number of different +matches at the same point when used with pcre2_dfa_match(). There is +always one pair of offsets; if ovecsize is zero, it is treated as one. +

+

+The second argument points to a general context, for custom memory management, +or is NULL for system memory management. The result of the function is NULL if +the memory for the block could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match_data_create_from_pattern.html b/3rd/pcre2/doc/html/pcre2_match_data_create_from_pattern.html new file mode 100644 index 00000000..db58ab91 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match_data_create_from_pattern.html @@ -0,0 +1,53 @@ + + +pcre2_match_data_create_from_pattern specification + + +

pcre2_match_data_create_from_pattern man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function creates a new match data block for holding the result of a match. +The first argument points to a compiled pattern. The number of capturing +parentheses within the pattern is used to compute the number of pairs of +offsets that are required in the match data block. These form the "output +vector" (ovector) within the match data block, and are used to identify the +matched string and any captured substrings when matching with +pcre2_match(). If you are using pcre2_dfa_match(), which uses the +output vector in a different way, you should use pcre2_match_data_create() +instead of this function. +

+

+The second argument points to a general context, for custom memory management, +or is NULL to use the same memory allocator as was used for the compiled +pattern. The result of the function is NULL if the memory for the block could +not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_match_data_free.html b/3rd/pcre2/doc/html/pcre2_match_data_free.html new file mode 100644 index 00000000..1c2520b9 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_match_data_free.html @@ -0,0 +1,48 @@ + + +pcre2_match_data_free specification + + +

pcre2_match_data_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_match_data_free(pcre2_match_data *match_data); +

+
+DESCRIPTION +
+

+If match_data is NULL, this function does nothing. Otherwise, +match_data must point to a match data block, which this function frees, +using the memory freeing function from the general context or compiled pattern +with which it was created, or free() if that was not set. If the match +data block was previously passed to pcre2_match(), it will have an +attached heapframe vector; this is also freed. +

+

+If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this +match data block, the copy of the subject that was referenced within the block +is also freed. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_pattern_convert.html b/3rd/pcre2/doc/html/pcre2_pattern_convert.html new file mode 100644 index 00000000..2fcd7cc0 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_pattern_convert.html @@ -0,0 +1,70 @@ + + +pcre2_pattern_convert specification + + +

pcre2_pattern_convert man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It converts a foreign pattern (for example, a glob) into a PCRE2 regular +expression pattern. Its arguments are: +

+  pattern     The foreign pattern
+  length      The length of the input pattern or PCRE2_ZERO_TERMINATED
+  options     Option bits
+  buffer      Pointer to pointer to output buffer, or NULL
+  blength     Pointer to output length field
+  cvcontext   Pointer to a convert context or NULL
+
+The length of the converted pattern (excluding the terminating zero) is +returned via blength. If buffer is NULL, the function just returns +the output length. If buffer points to a NULL pointer, heap memory is +obtained for the converted pattern, using the allocator in the context if +present (or else malloc()), and the field pointed to by buffer is +updated. If buffer points to a non-NULL field, that must point to a +buffer whose size is in the variable pointed to by blength. This value is +updated. +

+

+The option bits are: +

+  PCRE2_CONVERT_UTF                     Input is UTF
+  PCRE2_CONVERT_NO_UTF_CHECK            Do not check UTF validity
+  PCRE2_CONVERT_POSIX_BASIC             Convert POSIX basic pattern
+  PCRE2_CONVERT_POSIX_EXTENDED          Convert POSIX extended pattern
+  PCRE2_CONVERT_GLOB                    ) Convert
+  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR  )   various types
+  PCRE2_CONVERT_GLOB_NO_STARSTAR        )     of glob
+
+The return value from pcre2_pattern_convert() is zero on success or a +non-zero PCRE2 error code. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_pattern_info.html b/3rd/pcre2/doc/html/pcre2_pattern_info.html new file mode 100644 index 00000000..eaaac6c0 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_pattern_info.html @@ -0,0 +1,109 @@ + + +pcre2_pattern_info specification + + +

pcre2_pattern_info man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_pattern_info(const pcre2_code *code, uint32_t what, + void *where); +

+
+DESCRIPTION +
+

+This function returns information about a compiled pattern. Its arguments are: +

+  code     Pointer to a compiled regular expression pattern
+  what     What information is required
+  where    Where to put the information
+
+The recognized values for the what argument, and the information they +request are as follows: +
+  PCRE2_INFO_ALLOPTIONS      Final options after compiling
+  PCRE2_INFO_ARGOPTIONS      Options passed to pcre2_compile()
+  PCRE2_INFO_BACKREFMAX      Number of highest backreference
+  PCRE2_INFO_BSR             What \R matches:
+                               PCRE2_BSR_UNICODE: Unicode line endings
+                               PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only
+  PCRE2_INFO_CAPTURECOUNT    Number of capturing subpatterns
+  PCRE2_INFO_DEPTHLIMIT      Backtracking depth limit if set, otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_EXTRAOPTIONS    Extra options that were passed in the
+                               compile context
+  PCRE2_INFO_FIRSTBITMAP     Bitmap of first code units, or NULL
+  PCRE2_INFO_FIRSTCODETYPE   Type of start-of-match information
+                               0 nothing set
+                               1 first code unit is set
+                               2 start of string or after newline
+  PCRE2_INFO_FIRSTCODEUNIT   First code unit when type is 1
+  PCRE2_INFO_FRAMESIZE       Size of backtracking frame
+  PCRE2_INFO_HASBACKSLASHC   Return 1 if pattern contains \C
+  PCRE2_INFO_HASCRORLF       Return 1 if explicit CR or LF matches exist in the pattern
+  PCRE2_INFO_HEAPLIMIT       Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_JCHANGED        Return 1 if (?J) or (?-J) was used
+  PCRE2_INFO_JITSIZE         Size of JIT compiled code, or 0
+  PCRE2_INFO_LASTCODETYPE    Type of must-be-present information
+                               0 nothing set
+                               1 code unit is set
+  PCRE2_INFO_LASTCODEUNIT    Last code unit when type is 1
+  PCRE2_INFO_MATCHEMPTY      1 if the pattern can match an empty string, 0 otherwise
+  PCRE2_INFO_MATCHLIMIT      Match limit if set, otherwise PCRE2_ERROR_UNSET
+  PCRE2_INFO_MAXLOOKBEHIND   Length (in characters) of the longest lookbehind assertion
+  PCRE2_INFO_MINLENGTH       Lower bound length of matching strings
+  PCRE2_INFO_NAMECOUNT       Number of named subpatterns
+  PCRE2_INFO_NAMEENTRYSIZE   Size of name table entries
+  PCRE2_INFO_NAMETABLE       Pointer to name table
+  PCRE2_CONFIG_NEWLINE       Code for the newline sequence:
+                               PCRE2_NEWLINE_CR
+                               PCRE2_NEWLINE_LF
+                               PCRE2_NEWLINE_CRLF
+                               PCRE2_NEWLINE_ANY
+                               PCRE2_NEWLINE_ANYCRLF
+                               PCRE2_NEWLINE_NUL
+  PCRE2_INFO_RECURSIONLIMIT  Obsolete synonym for PCRE2_INFO_DEPTHLIMIT
+  PCRE2_INFO_SIZE            Size of compiled pattern
+
+If where is NULL, the function returns the amount of memory needed for +the requested information, in bytes. Otherwise, the where argument must +point to an unsigned 32-bit integer (uint32_t variable), except for the +following what values, when it must point to a variable of the type +shown: +
+  PCRE2_INFO_FIRSTBITMAP     const uint8_t *
+  PCRE2_INFO_JITSIZE         size_t
+  PCRE2_INFO_NAMETABLE       PCRE2_SPTR
+  PCRE2_INFO_SIZE            size_t
+
+The yield of the function is zero on success or: +
+  PCRE2_ERROR_NULL           the argument code is NULL
+  PCRE2_ERROR_BADMAGIC       the "magic number" was not found
+  PCRE2_ERROR_BADOPTION      the value of what is invalid
+  PCRE2_ERROR_BADMODE        the pattern was compiled in the wrong mode
+  PCRE2_ERROR_UNSET          the requested information is not set
+
+

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_serialize_decode.html b/3rd/pcre2/doc/html/pcre2_serialize_decode.html new file mode 100644 index 00000000..618ffa92 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_serialize_decode.html @@ -0,0 +1,65 @@ + + +pcre2_serialize_decode specification + + +

pcre2_serialize_decode man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function decodes a serialized set of compiled patterns back into a list of +individual patterns. This is possible only on a host that is running the same +version of PCRE2, with the same code unit width, and the host must also have +the same endianness, pointer width and PCRE2_SIZE type. The arguments for +pcre2_serialize_decode() are: +

+  codes            pointer to a vector in which to build the list
+  number_of_codes  number of slots in the vector
+  bytes            the serialized byte stream
+  gcontext         pointer to a general context or NULL
+
+The bytes argument must point to a block of data that was originally +created by pcre2_serialize_encode(), though it may have been saved on +disc or elsewhere in the meantime. If there are more codes in the serialized +data than slots in the list, only those compiled patterns that will fit are +decoded. The yield of the function is the number of decoded patterns, or one of +the following negative error codes: +
+  PCRE2_ERROR_BADDATA   number_of_codes is zero or less
+  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in bytes
+  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
+  PCRE2_ERROR_NULL      codes or bytes is NULL
+
+PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_serialize_encode.html b/3rd/pcre2/doc/html/pcre2_serialize_encode.html new file mode 100644 index 00000000..f1532700 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_serialize_encode.html @@ -0,0 +1,66 @@ + + +pcre2_serialize_encode specification + + +

pcre2_serialize_encode man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); +

+
+DESCRIPTION +
+

+This function encodes a list of compiled patterns into a byte stream that can +be saved on disc or elsewhere. Note that this is not an abstract format like +Java or .NET. Conversion of the byte stream back into usable compiled patterns +can only happen on a host that is running the same version of PCRE2, with the +same code unit width, and the host must also have the same endianness, pointer +width and PCRE2_SIZE type. The arguments for pcre2_serialize_encode() +are: +

+  codes             pointer to a vector containing the list
+  number_of_codes   number of slots in the vector
+  serialized_bytes  set to point to the serialized byte stream
+  serialized_size   set to the number of bytes in the byte stream
+  gcontext          pointer to a general context or NULL
+
+The context argument is used to obtain memory for the byte stream. When the +serialized data is no longer needed, it must be freed by calling +pcre2_serialize_free(). The yield of the function is the number of +serialized patterns, or one of the following negative error codes: +
+  PCRE2_ERROR_BADDATA      number_of_codes is zero or less
+  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
+  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
+  PCRE2_ERROR_NULL         an argument other than gcontext is NULL
+
+PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_serialize_free.html b/3rd/pcre2/doc/html/pcre2_serialize_free.html new file mode 100644 index 00000000..26b435bc --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_serialize_free.html @@ -0,0 +1,41 @@ + + +pcre2_serialize_free specification + + +

pcre2_serialize_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_serialize_free(uint8_t *bytes); +

+
+DESCRIPTION +
+

+This function frees the memory that was obtained by +pcre2_serialize_encode() to hold a serialized byte stream. The argument +must point to such a byte stream or be NULL, in which case the function returns +without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_serialize_get_number_of_codes.html b/3rd/pcre2/doc/html/pcre2_serialize_get_number_of_codes.html new file mode 100644 index 00000000..fdd24294 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_serialize_get_number_of_codes.html @@ -0,0 +1,49 @@ + + +pcre2_serialize_get_number_of_codes specification + + +

pcre2_serialize_get_number_of_codes man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); +

+
+DESCRIPTION +
+

+The bytes argument must point to a serialized byte stream that was +originally created by pcre2_serialize_encode() (though it may have been +saved on disc or elsewhere in the meantime). The function returns the number of +serialized patterns in the byte stream, or one of the following negative error +codes: +

+  PCRE2_ERROR_BADMAGIC  mismatch of id bytes in bytes
+  PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
+  PCRE2_ERROR_NULL      the argument is NULL
+
+PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the serialization functions in the +pcre2serialize +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_bsr.html b/3rd/pcre2/doc/html/pcre2_set_bsr.html new file mode 100644 index 00000000..8a62f18a --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_bsr.html @@ -0,0 +1,42 @@ + + +pcre2_set_bsr specification + + +

pcre2_set_bsr man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the convention for processing \R within a compile context. +The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The +result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_callout.html b/3rd/pcre2/doc/html/pcre2_set_callout.html new file mode 100644 index 00000000..4e7aca6c --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_callout.html @@ -0,0 +1,43 @@ + + +pcre2_set_callout specification + + +

pcre2_set_callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function sets the callout fields in a match context (the first argument). +The second argument specifies a callout function, and the third argument is an +opaque data item that is passed to it. The result of this function is always +zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_character_tables.html b/3rd/pcre2/doc/html/pcre2_set_character_tables.html new file mode 100644 index 00000000..8564eea6 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_character_tables.html @@ -0,0 +1,45 @@ + + +pcre2_set_character_tables specification + + +

pcre2_set_character_tables man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); +

+
+DESCRIPTION +
+

+This function sets a pointer to custom character tables within a compile +context. The second argument must point to a set of PCRE2 character tables or +be NULL to request the default tables. The result is always zero. Character +tables can be created by calling pcre2_maketables() or by running the +pcre2_dftables maintenance command in binary mode (see the +pcre2build +documentation). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_compile_extra_options.html b/3rd/pcre2/doc/html/pcre2_set_compile_extra_options.html new file mode 100644 index 00000000..cb62022a --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_compile_extra_options.html @@ -0,0 +1,58 @@ + + +pcre2_set_compile_extra_options specification + + +

pcre2_set_compile_extra_options man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); +

+
+DESCRIPTION +
+

+This function sets additional option bits for pcre2_compile() that are +housed in a compile context. It completely replaces all the bits. The extra +options are: +

+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
+  PCRE2_EXTRA_ASCII_BSD                \d remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSS                \s remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSW                \w remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_DIGIT              [:digit:] and [:xdigit:] POSIX classes remain ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_POSIX              POSIX classes remain ASCII in UCP mode
+  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
+  PCRE2_EXTRA_CASELESS_RESTRICT        Disable mixed ASCII/non-ASCII case folding
+  PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
+  PCRE2_EXTRA_MATCH_LINE               Pattern matches whole lines
+  PCRE2_EXTRA_MATCH_WORD               Pattern matches "words"
+  PCRE2_EXTRA_NEVER_CALLOUT            Disallow callouts in pattern
+  PCRE2_EXTRA_NO_BS0                   Disallow \0 (but not \00 or \000)
+  PCRE2_EXTRA_PYTHON_OCTAL             Use Python rules for octal
+  PCRE2_EXTRA_TURKISH_CASING           Use Turkish I case folding
+
+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_compile_recursion_guard.html b/3rd/pcre2/doc/html/pcre2_set_compile_recursion_guard.html new file mode 100644 index 00000000..c09942ce --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_compile_recursion_guard.html @@ -0,0 +1,46 @@ + + +pcre2_set_compile_recursion_guard specification + + +

pcre2_set_compile_recursion_guard man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); +

+
+DESCRIPTION +
+

+This function defines, within a compile context, a function that is called +whenever pcre2_compile() starts to compile a parenthesized part of a +pattern. The first argument to the function gives the current depth of +parenthesis nesting, and the second is user data that is supplied when the +function is set up. The callout function should return zero if all is well, or +non-zero to force an error. This feature is provided so that applications can +check the available system stack space, in order to avoid running out. The +result of pcre2_set_compile_recursion_guard() is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_depth_limit.html b/3rd/pcre2/doc/html/pcre2_set_depth_limit.html new file mode 100644 index 00000000..a1cf7062 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_depth_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_depth_limit specification + + +

pcre2_set_depth_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the backtracking depth limit field in a match context. The +result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_glob_escape.html b/3rd/pcre2/doc/html/pcre2_set_glob_escape.html new file mode 100644 index 00000000..2b556271 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_glob_escape.html @@ -0,0 +1,43 @@ + + +pcre2_set_glob_escape specification + + +

pcre2_set_glob_escape man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It sets the escape character that is used when converting globs. The second +argument must either be zero (meaning there is no escape character) or a +punctuation character whose code point is less than 256. The default is grave +accent if running under Windows, otherwise backslash. The result of the +function is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_glob_separator.html b/3rd/pcre2/doc/html/pcre2_set_glob_separator.html new file mode 100644 index 00000000..283648ea --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_glob_separator.html @@ -0,0 +1,42 @@ + + +pcre2_set_glob_separator specification + + +

pcre2_set_glob_separator man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); +

+
+DESCRIPTION +
+

+This function is part of an experimental set of pattern conversion functions. +It sets the component separator character that is used when converting globs. +The second argument must be one of the characters forward slash, backslash, or +dot. The default is backslash when running under Windows, otherwise forward +slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if +the second argument is invalid. +

+

+The pattern conversion functions are described in the +pcre2convert +documentation. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_heap_limit.html b/3rd/pcre2/doc/html/pcre2_set_heap_limit.html new file mode 100644 index 00000000..3631ef6f --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_heap_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_heap_limit specification + + +

pcre2_set_heap_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the backtracking heap limit field in a match context. The +result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_match_limit.html b/3rd/pcre2/doc/html/pcre2_set_match_limit.html new file mode 100644 index 00000000..e840c744 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_match_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_match_limit specification + + +

pcre2_set_match_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the match limit field in a match context. The result is +always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_max_pattern_compiled_length.html b/3rd/pcre2/doc/html/pcre2_set_max_pattern_compiled_length.html new file mode 100644 index 00000000..a40f41e4 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_max_pattern_compiled_length.html @@ -0,0 +1,44 @@ + + +pcre2_set_max_pattern_compiled_length specification + + +

pcre2_set_max_pattern_compiled_length man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); +

+
+DESCRIPTION +
+

+This function sets, in a compile context, the maximum size (in bytes) for the +memory needed to hold the compiled version of a pattern that is using this +context. The result is always zero. If a pattern that is passed to +pcre2_compile() referencing this context needs more memory, an error is +generated. The default is the largest number that a PCRE2_SIZE variable can +hold, which is effectively unlimited. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_max_pattern_length.html b/3rd/pcre2/doc/html/pcre2_set_max_pattern_length.html new file mode 100644 index 00000000..f6e422aa --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_max_pattern_length.html @@ -0,0 +1,43 @@ + + +pcre2_set_max_pattern_length specification + + +

pcre2_set_max_pattern_length man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +

+
+DESCRIPTION +
+

+This function sets, in a compile context, the maximum text length (in code +units) of the pattern that can be compiled. The result is always zero. If a +longer pattern is passed to pcre2_compile() there is an immediate error +return. The default is effectively unlimited, being the largest value a +PCRE2_SIZE variable can hold. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_max_varlookbehind.html b/3rd/pcre2/doc/html/pcre2_set_max_varlookbehind.html new file mode 100644 index 00000000..1c03def2 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_max_varlookbehind.html @@ -0,0 +1,42 @@ + + +pcre2_set_max_varlookbehind specification + + +

pcre2_set_max_varlookbehind man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_max_varlookbehind(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. The result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_newline.html b/3rd/pcre2/doc/html/pcre2_set_newline.html new file mode 100644 index 00000000..ba813001 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_newline.html @@ -0,0 +1,51 @@ + + +pcre2_set_newline specification + + +

pcre2_set_newline man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets the newline convention within a compile context. This +specifies which character(s) are recognized as newlines when compiling and +matching patterns. The second argument must be one of: +

+  PCRE2_NEWLINE_CR        Carriage return only
+  PCRE2_NEWLINE_LF        Linefeed only
+  PCRE2_NEWLINE_CRLF      CR followed by LF only
+  PCRE2_NEWLINE_ANYCRLF   Any of the above
+  PCRE2_NEWLINE_ANY       Any Unicode newline sequence
+  PCRE2_NEWLINE_NUL       The NUL character (binary zero)
+
+The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_offset_limit.html b/3rd/pcre2/doc/html/pcre2_set_offset_limit.html new file mode 100644 index 00000000..6d9a85c6 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_offset_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_offset_limit specification + + +

pcre2_set_offset_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +

+
+DESCRIPTION +
+

+This function sets the offset limit field in a match context. The result is +always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_optimize.html b/3rd/pcre2/doc/html/pcre2_set_optimize.html new file mode 100644 index 00000000..47caeb26 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_optimize.html @@ -0,0 +1,57 @@ + + +pcre2_set_optimize specification + + +

pcre2_set_optimize man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); +

+
+DESCRIPTION +
+

+This function controls which performance optimizations will be applied +by pcre2_compile(). It can be called multiple times with the same compile +context; the effects are cumulative, with the effects of later calls taking +precedence over earlier ones. +

+

+The result is zero for success, PCRE2_ERROR_NULL if ccontext is NULL, +or PCRE2_ERROR_BADOPTION if directive is unknown. The latter could be +useful to detect if a certain optimization is available. +

+

+The list of possible values for the directive parameter are: +

+  PCRE2_OPTIMIZATION_FULL   Enable all optimizations (default)
+  PCRE2_OPTIMIZATION_NONE   Disable all optimizations
+  PCRE2_AUTO_POSSESS        Enable auto-possessification
+  PCRE2_AUTO_POSSESS_OFF    Disable auto-possessification
+  PCRE2_DOTSTAR_ANCHOR      Enable implicit dotstar anchoring
+  PCRE2_DOTSTAR_ANCHOR_OFF  Disable implicit dotstar anchoring
+  PCRE2_START_OPTIMIZE      Enable start-up optimizations at match time
+  PCRE2_START_OPTIMIZE_OFF  Disable start-up optimizations at match time
+
+There is a complete description of the PCRE2 native API, including detailed +descriptions directive parameter values in the +pcre2api +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_parens_nest_limit.html b/3rd/pcre2/doc/html/pcre2_set_parens_nest_limit.html new file mode 100644 index 00000000..95fd31c3 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_parens_nest_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_parens_nest_limit specification + + +

pcre2_set_parens_nest_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function sets, in a compile context, the maximum depth of nested +parentheses in a pattern. The result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_recursion_limit.html b/3rd/pcre2/doc/html/pcre2_set_recursion_limit.html new file mode 100644 index 00000000..9ff68c2f --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_recursion_limit.html @@ -0,0 +1,40 @@ + + +pcre2_set_recursion_limit specification + + +

pcre2_set_recursion_limit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_recursion_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
+DESCRIPTION +
+

+This function is obsolete and should not be used in new code. Use +pcre2_set_depth_limit() instead. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_recursion_memory_management.html b/3rd/pcre2/doc/html/pcre2_set_recursion_memory_management.html new file mode 100644 index 00000000..37af73ca --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_recursion_memory_management.html @@ -0,0 +1,42 @@ + + +pcre2_set_recursion_memory_management specification + + +

pcre2_set_recursion_memory_management man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_recursion_memory_management( + pcre2_match_context *mcontext, + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); +

+
+DESCRIPTION +
+

+From release 10.30 onwards, this function is obsolete and does nothing. The +result is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_substitute_callout.html b/3rd/pcre2/doc/html/pcre2_set_substitute_callout.html new file mode 100644 index 00000000..8640728f --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_substitute_callout.html @@ -0,0 +1,43 @@ + + +pcre2_set_substitute_callout specification + + +

pcre2_set_substitute_callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function sets the substitute callout fields in a match context (the first +argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_set_substitute_case_callout.html b/3rd/pcre2/doc/html/pcre2_set_substitute_case_callout.html new file mode 100644 index 00000000..ab506879 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_set_substitute_case_callout.html @@ -0,0 +1,45 @@ + + +pcre2_set_substitute_case_callout specification + + +

pcre2_set_substitute_case_callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +

+
+DESCRIPTION +
+

+This function sets the substitute case callout fields in a match context (the +first argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substitute.html b/3rd/pcre2/doc/html/pcre2_substitute.html new file mode 100644 index 00000000..abf0a703 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substitute.html @@ -0,0 +1,111 @@ + + +pcre2_substitute specification + + +

pcre2_substitute man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); +

+
+DESCRIPTION +
+

+This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It then makes a +copy of the subject, substituting a replacement string for what was matched. +Its arguments are: +

+  code          Points to the compiled pattern
+  subject       Points to the subject string
+  length        Length of the subject string
+  startoffset   Offset in the subject at which to start matching
+  options       Option bits
+  match_data    Points to a match data block, or is NULL
+  mcontext      Points to a match context, or is NULL
+  replacement   Points to the replacement string
+  rlength       Length of the replacement string
+  outputbuffer  Points to the output buffer
+  outlengthptr  Points to the length of the output buffer
+
+A match data block is needed only if you want to inspect the data from the +final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is +set. A match context is needed only if you want to: +
+  Set up a callout function
+  Set a matching offset limit
+  Change the backtracking match limit
+  Change the backtracking depth limit
+  Set custom memory management in the match context
+
+The length, startoffset and rlength values are code units, +not characters, as is the contents of the variable pointed at by +outlengthptr. This variable must contain the length of the output buffer +when the function is called. If the function is successful, the value is +changed to the length of the new string, excluding the trailing zero that is +automatically added. +

+

+The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for +zero-terminated strings. The options are: +

+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
+  PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
+
+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, +PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. +

+

+If PCRE2_SUBSTITUTE_MATCHED is set, match_data must be non-NULL; its +contents must be the result of a call to pcre2_match() using the same +pattern and subject. +

+

+The function returns the number of substitutions, which may be zero if there +are no matches. The result may be greater than one only when +PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code +is returned. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_copy_byname.html b/3rd/pcre2/doc/html/pcre2_substring_copy_byname.html new file mode 100644 index 00000000..fd01805e --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_copy_byname.html @@ -0,0 +1,58 @@ + + +pcre2_substring_copy_byname specification + + +

pcre2_substring_copy_byname man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring, identified +by name, into a given buffer. The arguments are: +

+  match_data    The match data block for the match
+  name          Name of the required substring
+  buffer        Buffer to receive the string
+  bufflen       Length of buffer (code units)
+
+The bufflen variable is updated to contain the length of the extracted +string, excluding the trailing zero. The yield of the function is zero for +success or one of the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that name
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      the buffer is not big enough
+
+If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_copy_bynumber.html b/3rd/pcre2/doc/html/pcre2_substring_copy_bynumber.html new file mode 100644 index 00000000..83e1a272 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_copy_bynumber.html @@ -0,0 +1,57 @@ + + +pcre2_substring_copy_bynumber specification + + +

pcre2_substring_copy_bynumber man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring into a given +buffer. The arguments are: +

+  match_data    The match data block for the match
+  number        Number of the required substring
+  buffer        Buffer to receive the string
+  bufflen       Length of buffer
+
+The bufflen variable is updated with the length of the extracted string, +excluding the terminating zero. The yield of the function is zero for success +or one of the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that number
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      the buffer is too small
+
+
+

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_free.html b/3rd/pcre2/doc/html/pcre2_substring_free.html new file mode 100644 index 00000000..e0d0fbda --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_free.html @@ -0,0 +1,41 @@ + + +pcre2_substring_free specification + + +

pcre2_substring_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_substring_free(PCRE2_UCHAR *buffer); +

+
+DESCRIPTION +
+

+This is a convenience function for freeing the memory obtained by a previous +call to pcre2_substring_get_byname() or +pcre2_substring_get_bynumber(). Its only argument is a pointer to the +string. If the argument is NULL, the function does nothing. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_get_byname.html b/3rd/pcre2/doc/html/pcre2_substring_get_byname.html new file mode 100644 index 00000000..a4b8771d --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_get_byname.html @@ -0,0 +1,60 @@ + + +pcre2_substring_get_byname specification + + +

pcre2_substring_get_byname man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring by name into +newly acquired memory. The arguments are: +

+  match_data    The match data for the match
+  name          Name of the required substring
+  bufferptr     Where to put the string pointer
+  bufflen       Where to put the string length
+
+The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function pcre2_substring_free() can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that name
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      memory could not be obtained
+
+If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_get_bynumber.html b/3rd/pcre2/doc/html/pcre2_substring_get_bynumber.html new file mode 100644 index 00000000..391bc82b --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_get_bynumber.html @@ -0,0 +1,58 @@ + + +pcre2_substring_get_bynumber specification + + +

pcre2_substring_get_bynumber man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting a captured substring by number +into newly acquired memory. The arguments are: +

+  match_data    The match data for the match
+  number        Number of the required substring
+  bufferptr     Where to put the string pointer
+  bufflen       Where to put the string length
+
+The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function pcre2_substring_free() can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +
+  PCRE2_ERROR_NOSUBSTRING   there are no groups of that number
+  PCRE2_ERROR_UNAVAILBLE    the ovector was too small for that group
+  PCRE2_ERROR_UNSET         the group did not participate in the match
+  PCRE2_ERROR_NOMEMORY      memory could not be obtained
+
+
+

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_length_byname.html b/3rd/pcre2/doc/html/pcre2_substring_length_byname.html new file mode 100644 index 00000000..213bc949 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_length_byname.html @@ -0,0 +1,46 @@ + + +pcre2_substring_length_byname specification + + +

pcre2_substring_length_byname man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); +

+
+DESCRIPTION +
+

+This function returns the length of a matched substring, identified by name. +The arguments are: +

+  match_data   The match data block for the match
+  name         The substring name
+  length       Where to return the length
+
+The yield is zero on success, or an error code if the substring is not found. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_length_bynumber.html b/3rd/pcre2/doc/html/pcre2_substring_length_bynumber.html new file mode 100644 index 00000000..db01cca4 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_length_bynumber.html @@ -0,0 +1,48 @@ + + +pcre2_substring_length_bynumber specification + + +

pcre2_substring_length_bynumber man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); +

+
+DESCRIPTION +
+

+This function returns the length of a matched substring, identified by number. +The arguments are: +

+  match_data   The match data block for the match
+  number       The substring number
+  length       Where to return the length, or NULL
+
+The third argument may be NULL if all you want to know is whether or not a +substring is set. The yield is zero on success, or a negative error code +otherwise. After a partial match, only substring 0 is available. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_list_free.html b/3rd/pcre2/doc/html/pcre2_substring_list_free.html new file mode 100644 index 00000000..dea8bc58 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_list_free.html @@ -0,0 +1,41 @@ + + +pcre2_substring_list_free specification + + +

pcre2_substring_list_free man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+void pcre2_substring_list_free(PCRE2_UCHAR **list); +

+
+DESCRIPTION +
+

+This is a convenience function for freeing the store obtained by a previous +call to pcre2substring_list_get(). Its only argument is a pointer to +the list of string pointers. If the argument is NULL, the function returns +immediately, without doing anything. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_list_get.html b/3rd/pcre2/doc/html/pcre2_substring_list_get.html new file mode 100644 index 00000000..fd436274 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_list_get.html @@ -0,0 +1,56 @@ + + +pcre2_substring_list_get specification + + +

pcre2_substring_list_get man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_list_get(pcre2_match_data *match_data, +" PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); +

+
+DESCRIPTION +
+

+This is a convenience function for extracting all the captured substrings after +a pattern match. It builds a list of pointers to the strings, and (optionally) +a second list that contains their lengths (in code units), excluding a +terminating zero that is added to each of them. All this is done in a single +block of memory that is obtained using the same memory allocation function that +was used to get the match data block. The convenience function +pcre2_substring_list_free() can be used to free it when it is no longer +needed. The arguments are: +

+  match_data    The match data block
+  listptr       Where to put a pointer to the list
+  lengthsptr    Where to put a pointer to the lengths, or NULL
+
+A pointer to a list of pointers is put in the variable whose address is in +listptr. The list is terminated by a NULL pointer. If lengthsptr is +not NULL, a matching list of lengths is created, and its address is placed in +lengthsptr. The yield of the function is zero on success or +PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained. +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_nametable_scan.html b/3rd/pcre2/doc/html/pcre2_substring_nametable_scan.html new file mode 100644 index 00000000..277affae --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_nametable_scan.html @@ -0,0 +1,53 @@ + + +pcre2_substring_nametable_scan specification + + +

pcre2_substring_nametable_scan man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); +

+
+DESCRIPTION +
+

+This convenience function finds, for a compiled pattern, the first and last +entries for a given name in the table that translates capture group names into +numbers. +

+  code    Compiled regular expression
+  name    Name whose entries required
+  first   Where to return a pointer to the first entry
+  last    Where to return a pointer to the last entry
+
+When the name is found in the table, if first is NULL, the function +returns a group number, but if there is more than one matching entry, it is not +defined which one. Otherwise, when both pointers have been set, the yield of +the function is the length of each entry in code units. If the name is not +found, PCRE2_ERROR_NOSUBSTRING is returned. +

+

+There is a complete description of the PCRE2 native API, including the format of +the table entries, in the +pcre2api +page, and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2_substring_number_from_name.html b/3rd/pcre2/doc/html/pcre2_substring_number_from_name.html new file mode 100644 index 00000000..160fbda6 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2_substring_number_from_name.html @@ -0,0 +1,50 @@ + + +pcre2_substring_number_from_name specification + + +

pcre2_substring_number_from_name man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SYNOPSIS +
+

+#include <pcre2.h> +

+

+int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); +

+
+DESCRIPTION +
+

+This convenience function finds the number of a named substring capturing +parenthesis in a compiled pattern, provided that it is a unique name. The +function arguments are: +

+  code    Compiled regular expression
+  name    Name whose number is required
+
+The yield of the function is the number of the parenthesis if the name is +found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are +allowed (PCRE2_DUPNAMES is set), if the name is not unique, +PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers +with the same name by calling pcre2_substring_nametable_scan(). +

+

+There is a complete description of the PCRE2 native API in the +pcre2api +page and a description of the POSIX API in the +pcre2posix +page. +

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2api.html b/3rd/pcre2/doc/html/pcre2api.html new file mode 100644 index 00000000..079cf176 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2api.html @@ -0,0 +1,4496 @@ + + +pcre2api specification + + +

pcre2api man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+

+#include <pcre2.h> +
+
+PCRE2 is a new API for PCRE, starting at release 10.0. This document contains a +description of all its native functions. See the +pcre2 +document for an overview of all the PCRE2 documentation. +

+
PCRE2 NATIVE API BASIC FUNCTIONS
+

+pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); +
+
+void pcre2_code_free(pcre2_code *code); +
+
+pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); +
+
+pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); +
+
+int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +
+
+int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); +
+
+void pcre2_match_data_free(pcre2_match_data *match_data); +

+
PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS
+

+PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); +
+
+uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); +
+
+PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); +

+
PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS
+

+pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); +
+
+pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); +
+
+void pcre2_general_context_free(pcre2_general_context *gcontext); +

+
PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS
+

+pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); +
+
+void pcre2_compile_context_free(pcre2_compile_context *ccontext); +
+
+int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); +
+
+int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); +
+
+int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); +
+
+int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +
+
+int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); +
+
+int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, +" uint32_t value); +
+
+int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); +
+
+int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); +
+
+int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); +
+
+int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); +

+
PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
+

+pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); +
+
+void pcre2_match_context_free(pcre2_match_context *mcontext); +
+
+int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); +
+
+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +
+
+int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +
+
+int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +
+
+int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); +

+
PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS
+

+int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); +
+
+void pcre2_substring_free(PCRE2_UCHAR *buffer); +
+
+int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); +
+
+int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); +
+
+int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); +
+
+int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); +
+
+void pcre2_substring_list_free(PCRE2_UCHAR **list); +
+
+int pcre2_substring_list_get(pcre2_match_data *match_data, +" PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); +

+
PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION
+

+int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacementz, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); +

+
PCRE2 NATIVE API JIT FUNCTIONS
+

+int pcre2_jit_compile(pcre2_code *code, uint32_t options); +
+
+int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +
+
+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +
+
+pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); +
+
+void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); +
+
+void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); +

+
PCRE2 NATIVE API SERIALIZATION FUNCTIONS
+

+int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); +
+
+int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); +
+
+void pcre2_serialize_free(uint8_t *bytes); +
+
+int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); +

+
PCRE2 NATIVE API AUXILIARY FUNCTIONS
+

+pcre2_code *pcre2_code_copy(const pcre2_code *code); +
+
+pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +
+
+int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +
+
+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +
+
+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +
+
+int pcre2_pattern_info(const pcre2_code *code, uint32_t what, + void *where); +
+
+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +
+
+int pcre2_config(uint32_t what, void *where); +

+
PCRE2 NATIVE API OBSOLETE FUNCTIONS
+

+int pcre2_set_recursion_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+int pcre2_set_recursion_memory_management( + pcre2_match_context *mcontext, + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); +
+
+These functions became obsolete at release 10.30 and are retained only for +backward compatibility. They should not be used in new code. The first is +replaced by pcre2_set_depth_limit(); the second is no longer needed and +has no effect (it always returns zero). +

+
PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
+

+pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); +
+
+void pcre2_convert_context_free(pcre2_convert_context *cvcontext); +
+
+int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); +
+
+int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); +
+
+int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); +
+
+void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); +
+
+These functions provide a way of converting non-PCRE2 patterns into +patterns that can be processed by pcre2_compile(). This facility is +experimental and may be changed in future releases. At present, "globs" and +POSIX basic and extended patterns can be converted. Details are given in the +pcre2convert +documentation. +

+
PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES
+

+There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code +units, respectively. However, there is just one header file, pcre2.h. +This contains the function prototypes and other definitions for all three +libraries. One, two, or all three can be installed simultaneously. On Unix-like +systems the libraries are called libpcre2-8, libpcre2-16, and +libpcre2-32, and they can also co-exist with the original PCRE libraries. +Every PCRE2 function comes in three different forms, one for each library, for +example: +

+  pcre2_compile_8()
+  pcre2_compile_16()
+  pcre2_compile_32()
+
+There are also three different sets of data types: +
+  PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32
+  PCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32
+
+The UCHAR types define unsigned code units of the appropriate widths. +For example, PCRE2_UCHAR16 is usually defined as `uint16_t'. +The SPTR types are pointers to constants of the equivalent UCHAR types, +that is, they are pointers to vectors of unsigned code units. +

+

+Character strings are passed to a PCRE2 library as sequences of unsigned +integers in code units of the appropriate width. The length of a string may +be given as a number of code units, or the string may be specified as +zero-terminated. +

+

+Many applications use only one code unit width. For their convenience, macros +are defined whose names are the generic forms such as pcre2_compile() and +PCRE2_SPTR. These macros use the value of the macro PCRE2_CODE_UNIT_WIDTH to +generate the appropriate width-specific function and macro names. +PCRE2_CODE_UNIT_WIDTH is not defined by default. An application must define it +to be 8, 16, or 32 before including pcre2.h in order to make use of the +generic names. +

+

+Applications that use more than one code unit width can be linked with more +than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to be 0 before +including pcre2.h, and then use the real function names. Any code that is +to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is +unknown should also use the real function names. (Unfortunately, it is not +possible in C code to save and restore the value of a macro.) +

+

+If PCRE2_CODE_UNIT_WIDTH is not defined before including pcre2.h, a +compiler error occurs. +

+

+When using multiple libraries in an application, you must take care when +processing any particular pattern to use only functions from a single library. +For example, if you want to run a match using a pattern that was compiled with +pcre2_compile_16(), you must do so with pcre2_match_16(), not +pcre2_match_8() or pcre2_match_32(). +

+

+In the function summaries above, and in the rest of this document and other +PCRE2 documents, functions and data types are described using their generic +names, without the _8, _16, or _32 suffix. +

+
PCRE2 API OVERVIEW
+

+PCRE2 has its own native API, which is described in this document. There are +also some wrapper functions for the 8-bit library that correspond to the +POSIX regular expression API, but they do not give access to all the +functionality of PCRE2 and they are not thread-safe. They are described in the +pcre2posix +documentation. Both these APIs define a set of C function calls. +

+

+The native API C data types, function prototypes, option values, and error +codes are defined in the header file pcre2.h, which also contains +definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers +for the library. Applications can use these to include support for different +releases of PCRE2. +

+

+In a Windows environment, if you want to statically link an application program +against a non-dll PCRE2 library, you must define PCRE2_STATIC before including +pcre2.h. +

+

+The functions pcre2_compile() and pcre2_match() are used for +compiling and matching regular expressions in a Perl-compatible manner. A +sample program that demonstrates the simplest way of using them is provided in +the file called pcre2demo.c in the PCRE2 source distribution. A listing +of this program is given in the +pcre2demo +documentation, and the +pcre2sample +documentation describes how to compile and run it. +

+

+The compiling and matching functions recognize various options that are passed +as bits in an options argument. There are also some more complicated parameters +such as custom memory management functions and resource limits that are passed +in "contexts" (which are just memory blocks, described below). Simple +applications do not need to make use of contexts. +

+

+Just-in-time (JIT) compiler support is an optional feature of PCRE2 that can be +built in appropriate hardware environments. It greatly speeds up the matching +performance of many patterns. Programs can request that it be used if +available by calling pcre2_jit_compile() after a pattern has been +successfully compiled by pcre2_compile(). This does nothing if JIT +support is not available. +

+

+More complicated programs might need to make use of the specialist functions +pcre2_jit_stack_create(), pcre2_jit_stack_free(), and +pcre2_jit_stack_assign() in order to control the JIT code's memory usage. +

+

+JIT matching is automatically used by pcre2_match() if it is available, +unless the PCRE2_NO_JIT option is set. There is also a direct interface for JIT +matching, which gives improved performance at the expense of less sanity +checking. The JIT-specific functions are discussed in the +pcre2jit +documentation. +

+

+A second matching function, pcre2_dfa_match(), which is not +Perl-compatible, is also provided. This uses a different algorithm for the +matching. The alternative algorithm finds all possible matches (at a given +point in the subject), and scans the subject just once (unless there are +lookaround assertions). However, this algorithm does not return captured +substrings. A description of the two matching algorithms and their advantages +and disadvantages is given in the +pcre2matching +documentation. There is no JIT support for pcre2_dfa_match(). +

+

+In addition to the main compiling and matching functions, there are convenience +functions for extracting captured substrings from a subject string that has +been matched by pcre2_match(). They are: +

+  pcre2_substring_copy_byname()
+  pcre2_substring_copy_bynumber()
+  pcre2_substring_get_byname()
+  pcre2_substring_get_bynumber()
+  pcre2_substring_list_get()
+  pcre2_substring_length_byname()
+  pcre2_substring_length_bynumber()
+  pcre2_substring_nametable_scan()
+  pcre2_substring_number_from_name()
+
+pcre2_substring_free() and pcre2_substring_list_free() are also +provided, to free memory used for extracted strings. If either of these +functions is called with a NULL argument, the function returns immediately +without doing anything. +

+

+The function pcre2_substitute() can be called to match a pattern and +return a copy of the subject string with substitutions for parts that were +matched. +

+

+Functions whose names begin with pcre2_serialize_ are used for saving +compiled patterns on disc or elsewhere, and reloading them later. +

+

+Finally, there are functions for finding out information about a compiled +pattern (pcre2_pattern_info()) and about the configuration with which +PCRE2 was built (pcre2_config()). +

+

+Functions with names ending with _free() are used for freeing memory +blocks of various sorts. In all cases, if one of these functions is called with +a NULL argument, it does nothing. +

+
STRING LENGTHS AND OFFSETS
+

+The PCRE2 API uses string lengths and offsets into strings of code units in +several places. These values are always of type PCRE2_SIZE, which is an +unsigned integer type, currently always defined as size_t. The largest +value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved +as a special indicator for zero-terminated strings and unset offsets. +Therefore, the longest string that can be handled is one less than this +maximum. Note that string lengths are always given in code units. Only in the +8-bit library is such a length the same as the number of bytes in the string. +

+
NEWLINES
+

+PCRE2 supports five different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, or any +Unicode newline sequence. The Unicode newline sequences are the three just +mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +

+

+Each of the first three conventions is used by at least one operating system as +its standard newline sequence. When PCRE2 is built, a default can be specified. +If it is not, the default is set to LF, which is the Unix standard. However, +the newline convention can be changed by an application when calling +pcre2_compile(), or it can be specified by special text at the start of +the pattern itself; this overrides any other settings. See the +pcre2pattern +page for details of the special character sequences. +

+

+In the PCRE2 documentation the word "newline" is used to mean "the character or +pair of characters that indicate a line break". The choice of newline +convention affects the handling of the dot, circumflex, and dollar +metacharacters, the handling of #-comments in /x mode, and, when CRLF is a +recognized line ending sequence, the match position advancement for a +non-anchored pattern. There is more detail about this in the +section on pcre2_match() options +below. +

+

+The choice of newline convention does not affect the interpretation of +the \n or \r escape sequences, nor does it affect what \R matches; this has +its own separate convention. +

+
MULTITHREADING
+

+In a multithreaded application it is important to keep thread-specific data +separate from data that can be shared between threads. The PCRE2 library code +itself is thread-safe: it contains no static or global variables. The API is +designed to be fairly simple for non-threaded applications while at the same +time ensuring that multithreaded applications can use it. +

+

+There are several different blocks of data that are used to pass information +between the application and the PCRE2 libraries. +

+
+The compiled pattern +
+

+A pointer to the compiled form of a pattern is returned to the user when +pcre2_compile() is successful. The data in the compiled pattern is fixed, +and does not change when the pattern is matched. Therefore, it is thread-safe, +that is, the same compiled pattern can be used by more than one thread +simultaneously. For example, an application can compile all its patterns at the +start, before forking off multiple threads that use them. However, if the +just-in-time (JIT) optimization feature is being used, it needs separate memory +stack areas for each thread. See the +pcre2jit +documentation for more details. +

+

+In a more complicated situation, where patterns are compiled only when they are +first needed, but are still shared between threads, pointers to compiled +patterns must be protected from simultaneous writing by multiple threads. This +is somewhat tricky to do correctly. If you know that writing to a pointer is +atomic in your environment, you can use logic like this: +

+  Get a read-only (shared) lock (mutex) for pointer
+  if (pointer == NULL)
+    {
+    Get a write (unique) lock for pointer
+    if (pointer == NULL) pointer = pcre2_compile(...
+    }
+  Release the lock
+  Use pointer in pcre2_match()
+
+Of course, testing for compilation errors should also be included in the code. +

+

+The reason for checking the pointer a second time is as follows: Several +threads may have acquired the shared lock and tested the pointer for being +NULL, but only one of them will be given the write lock, with the rest kept +waiting. The winning thread will compile the pattern and store the result. +After this thread releases the write lock, another thread will get it, and if +it does not retest pointer for being NULL, will recompile the pattern and +overwrite the pointer, creating a memory leak and possibly causing other +issues. +

+

+In an environment where writing to a pointer may not be atomic, the above logic +is not sufficient. The thread that is doing the compiling may be descheduled +after writing only part of the pointer, which could cause other threads to use +an invalid value. Instead of checking the pointer itself, a separate "pointer +is valid" flag (that can be updated atomically) must be used: +

+  Get a read-only (shared) lock (mutex) for pointer
+  if (!pointer_is_valid)
+    {
+    Get a write (unique) lock for pointer
+    if (!pointer_is_valid)
+      {
+      pointer = pcre2_compile(...
+      pointer_is_valid = TRUE
+      }
+    }
+  Release the lock
+  Use pointer in pcre2_match()
+
+If JIT is being used, but the JIT compilation is not being done immediately +(perhaps waiting to see if the pattern is used often enough), similar logic is +required. JIT compilation updates a value within the compiled code block, so a +thread must gain unique write access to the pointer before calling +pcre2_jit_compile(). Alternatively, pcre2_code_copy() or +pcre2_code_copy_with_tables() can be used to obtain a private copy of the +compiled code before calling the JIT compiler. +

+
+Context blocks +
+

+The next main section below introduces the idea of "contexts" in which PCRE2 +functions are called. A context is nothing more than a collection of parameters +that control the way PCRE2 operates. Grouping a number of parameters together +in a context is a convenient way of passing them to a PCRE2 function without +using lots of arguments. The parameters that are stored in contexts are in some +sense "advanced features" of the API. Many straightforward applications will +not need to use contexts. +

+

+In a multithreaded application, if the parameters in a context are values that +are never changed, the same context can be used by all the threads. However, if +any thread needs to change any value in a context, it must make its own +thread-specific copy. +

+
+Match blocks +
+

+The matching functions need a block of memory for storing the results of a +match. This includes details of what was matched, as well as additional +information such as the name of a (*MARK) setting. Each thread must provide its +own copy of this memory. +

+
PCRE2 CONTEXTS
+

+Some PCRE2 functions have a lot of parameters, many of which are used only by +specialist applications, for example, those that use custom memory management +or non-standard character tables. To keep function argument lists at a +reasonable size, and at the same time to keep the API extensible, "uncommon" +parameters are passed to certain functions in a context instead of +directly. A context is just a block of memory that holds the parameter values. +Applications that do not need to adjust any of the context parameters can pass +NULL when a context pointer is required. +

+

+There are three different types of context: a general context that is relevant +for several PCRE2 operations, a compile-time context, and a match-time context. +

+
+The general context +
+

+At present, this context just contains pointers to (and data for) external +memory management functions that are called from several places in the PCRE2 +library. The context is named `general' rather than specifically `memory' +because in future other fields may be added. If you do not want to supply your +own custom memory management functions, you do not need to bother with a +general context. A general context is created by: +
+
+pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); +
+
+The two function pointers specify custom memory management functions, whose +prototypes are: +

+  void *private_malloc(PCRE2_SIZE, void *);
+  void  private_free(void *, void *);
+
+Whenever code in PCRE2 calls these functions, the final argument is the value +of memory_data. Either of the first two arguments of the creation +function may be NULL, in which case the system memory management functions +malloc() and free() are used. (This is not currently useful, as +there are no other fields in a general context, but in future there might be.) +The private_malloc() function is used (if supplied) to obtain memory for +storing the context, and all three values are saved as part of the context. +

+

+Whenever PCRE2 creates a data block of any kind, the block contains a pointer +to the free() function that matches the malloc() function that was +used. When the time comes to free the block, this function is called. +

+

+A general context can be copied by calling: +
+
+pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); +
+
+The memory used for a general context should be freed by calling: +
+
+void pcre2_general_context_free(pcre2_general_context *gcontext); +
+
+If this function is passed a NULL argument, it returns immediately without +doing anything. +

+
+The compile context +
+

+A compile context is required if you want to provide an external function for +stack checking during compilation or to change the default values of any of the +following compile-time parameters: +

+  What \R matches (Unicode newlines or CR, LF, CRLF only)
+  PCRE2's character tables
+  The newline character sequence
+  The compile time nested parentheses limit
+  The maximum length of the pattern string
+  The extra options bits (none set by default)
+  Which performance optimizations the compiler should apply
+
+A compile context is also required if you are using custom memory management. +If none of these apply, just pass NULL as the context argument of +pcre2_compile(). +

+

+A compile context is created, copied, and freed by the following functions: +
+
+pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); +
+
+void pcre2_compile_context_free(pcre2_compile_context *ccontext); +
+
+A compile context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +
+
+int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); +
+
+The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, +or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line +ending sequence. The value is used by the JIT compiler and by the two +interpreted matching functions, pcre2_match() and +pcre2_dfa_match(). +
+
+int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); +
+
+The value must be the result of a call to pcre2_maketables(), whose only +argument is a general context. This function builds a set of character tables +in the current locale. +
+
+int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); +
+
+As PCRE2 has developed, almost all the 32 option bits that are available in +the options argument of pcre2_compile() have been used up. To avoid +running out, the compile context contains a set of extra option bits which are +used for some newer, assumed rarer, options. This function sets those bits. It +always sets all the bits (either on or off). It does not modify any existing +setting. The available options are defined in the section entitled "Extra +compile options" +below. +
+
+int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); +
+
+This sets a maximum length, in code units, for any pattern string that is +compiled with this context. If the pattern is longer, an error is generated. +This facility is provided so that applications that accept patterns from +external sources can limit their size. The default is the largest number that a +PCRE2_SIZE variable can hold, which is effectively unlimited. +
+
+int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); +
+
+This sets a maximum size, in bytes, for the memory needed to hold the compiled +version of a pattern that is compiled with this context. If the pattern needs +more memory, an error is generated. This facility is provided so that +applications that accept patterns from external sources can limit the amount of +memory they use. The default is the largest number that a PCRE2_SIZE variable +can hold, which is effectively unlimited. +
+
+int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, +" uint32_t value); +
+
+This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. +
+
+int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); +
+
+This specifies which characters or character sequences are to be recognized as +newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), +PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character +sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), +PCRE2_NEWLINE_ANY (any Unicode newline sequence), or PCRE2_NEWLINE_NUL (the +NUL character, that is a binary zero). +

+

+A pattern can override the value set in the compile context by starting with a +sequence such as (*CRLF). See the +pcre2pattern +page for details. +

+

+When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE +option, the newline convention affects the recognition of the end of internal +comments starting with #. The value is saved with the compiled pattern for +subsequent use by the JIT compiler and by the two interpreted matching +functions, pcre2_match() and pcre2_dfa_match(). +
+
+int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); +
+
+This parameter adjusts the limit, set when PCRE2 is built (default 250), on the +depth of parenthesis nesting in a pattern. This limit stops rogue patterns +using up too much system stack when being compiled. The limit applies to +parentheses of all kinds, not just capturing parentheses. +
+
+int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); +
+
+There is at least one application that runs PCRE2 in threads with very limited +system stack, where running out of stack is to be avoided at all costs. The +parenthesis limit above cannot take account of how much stack is actually +available during compilation. For a finer control, you can supply a function +that is called whenever pcre2_compile() starts to compile a parenthesized +part of a pattern. This function can check the actual stack size (or anything +else that it wants to, of course). +

+

+The first argument to the callout function gives the current depth of +nesting, and the second is user data that is set up by the last argument of +pcre2_set_compile_recursion_guard(). The callout function should return +zero if all is well, or non-zero to force an error. +
+
+int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); +
+
+PCRE2 can apply various performance optimizations during compilation, in order +to make matching faster. For example, the compiler might convert some regex +constructs into an equivalent construct which pcre2_match() can execute +faster. By default, all available optimizations are enabled. However, in rare +cases, one might wish to disable specific optimizations. For example, if it is +known that some optimizations cannot benefit a certain regex, it might be +desirable to disable them, in order to speed up compilation. +

+

+The permitted values of directive are as follows: +

+  PCRE2_OPTIMIZATION_FULL
+
+Enable all optional performance optimizations. This is the default value. +
+  PCRE2_OPTIMIZATION_NONE
+
+Disable all optional performance optimizations. +
+  PCRE2_AUTO_POSSESS
+  PCRE2_AUTO_POSSESS_OFF
+
+Enable/disable "auto-possessification" of variable quantifiers such as * and +. +This optimization, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +disable this optimization if you want the matching functions to do a full, +unoptimized search and run all the callouts. +
+  PCRE2_DOTSTAR_ANCHOR
+  PCRE2_DOTSTAR_ANCHOR_OFF
+
+Enable/disable an optimization that is applied when .* is the first significant +item in a top-level branch of a pattern, and all the other branches also start +with .* or with \A or \G or ^. Such a pattern is automatically anchored if +PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set for any +^ items. Otherwise, the fact that any match must start either at the start of +the subject or following a newline is remembered. Like other optimizations, +this can cause callouts to be skipped. +

+

+Dotstar anchor optimization is automatically disabled for .* if it is inside an +atomic group or a capture group that is the subject of a backreference, or if +the pattern contains (*PRUNE) or (*SKIP). +

+  PCRE2_START_OPTIMIZE
+  PCRE2_START_OPTIMIZE_OFF
+
+Enable/disable optimizations which cause matching functions to scan the subject +string for specific code unit values before attempting a match. For example, if +it is known that an unanchored match must start with a specific value, the +matching code searches the subject for that value, and fails immediately if it +cannot find it, without actually running the main matching function. This means +that a special item such as (*COMMIT) at the start of a pattern is not +considered until after a suitable starting point for the match has been found. +Also, when callouts or (*MARK) items are in use, these "start-up" optimizations +can cause them to be skipped if the pattern is never actually used. The start-up +optimizations are in effect a pre-scan of the subject that takes place before +the pattern is run. +

+

+Disabling start-up optimizations ensures that in cases where the result is "no +match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are +considered at every possible starting position in the subject string. +

+

+Disabling start-up optimizations may change the outcome of a matching operation. +Consider the pattern +

+  (*COMMIT)ABC
+
+When this is compiled, PCRE2 records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run without start-up optimizations, the initial scan along the subject +string does not happen. The first match attempt is run starting from "D" and +when this fails, (*COMMIT) prevents any further matches being tried, so the +overall result is "no match". +

+

+Another start-up optimization makes use of a minimum length for a matching +subject, which is recorded when possible. Consider the pattern +

+  (*MARK:1)B(*MARK:2)(X|Y)
+
+The minimum length for a match is two characters. If the subject is "XXBB", the +"starting character" optimization skips "XX", then tries to match "BB", which +is long enough. In the process, (*MARK:2) is encountered and remembered. When +the match attempt fails, the next "B" is found, but there is only one character +left, so there are no more attempts, and "no match" is returned with the "last +mark seen" set to "2". Without start-up optimizations, however, matches are +tried at every possible starting position, including at the end of the subject, +where (*MARK:1) is encountered, but there is no "B", so the "last mark seen" +that is returned is "1". In this case, the optimizations do not affect the +overall match result, which is still "no match", but they do affect the +auxiliary information that is returned. +

+
+The match context +
+

+A match context is required if you want to: +

+  Set up a callout function
+  Set an offset limit for matching an unanchored pattern
+  Change the limit on the amount of heap used when matching
+  Change the backtracking match limit
+  Change the backtracking depth limit
+  Set custom memory management specifically for the match
+
+If none of these apply, just pass NULL as the context argument of +pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match(). +

+

+A match context is created, copied, and freed by the following functions: +
+
+pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); +
+
+void pcre2_match_context_free(pcre2_match_context *mcontext); +
+
+A match context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +
+
+int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); +
+
+This sets up a callout function for PCRE2 to call at specified points +during a matching operation. Details are given in the +pcre2callout +documentation. +
+
+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +
+
+This sets up a callout function for PCRE2 to call after each substitution +made by pcre2_substitute(). Details are given in the section entitled +"Creating a new string with substitutions" +below. +
+
+int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +
+
+This sets up a callout function for PCRE2 to call when performing case +transformations inside pcre2_substitute(). Details are given in the +section entitled "Creating a new string with substitutions" +below. +
+
+int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); +
+
+The offset_limit parameter limits how far an unanchored search can +advance in the subject string. The default value is PCRE2_UNSET. The +pcre2_match() and pcre2_dfa_match() functions return +PCRE2_ERROR_NOMATCH if a match with a starting point before or at the given +offset is not found. The pcre2_substitute() function makes no more +substitutions. +

+

+For example, if the pattern /abc/ is matched against "123abc" with an offset +limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match can never be +found if the startoffset argument of pcre2_match(), +pcre2_dfa_match(), or pcre2_substitute() is greater than the offset +limit set in the match context. +

+

+When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT option when +calling pcre2_compile() so that when JIT is in use, different code can be +compiled. If a match is started with a non-default match limit when +PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. +

+

+The offset limit facility can be used to track progress when searching large +subject strings or to limit the extent of global substitutions. See also the +PCRE2_FIRSTLINE option, which requires a match to start before or at the first +newline that follows the start of matching in the subject. If this is set with +an offset limit, a match must occur in the first line and also within the +offset limit. In other words, whichever limit comes first is used. +
+
+int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+The heap_limit parameter specifies, in units of kibibytes (1024 bytes), +the maximum amount of heap memory that pcre2_match() may use to hold +backtracking information when running an interpretive match. This limit also +applies to pcre2_dfa_match(), which may use the heap when processing +patterns with a lot of nested pattern recursion or lookarounds or atomic +groups. This limit does not apply to matching with the JIT optimization, which +has its own memory control arrangements (see the +pcre2jit +documentation for more details). If the limit is reached, the negative error +code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2 +is built; if it is not, the default is set very large and is essentially +unlimited. +

+

+A value for the heap limit may also be supplied by an item at the start of a +pattern of the form +

+  (*LIMIT_HEAP=ddd)
+
+where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of pcre2_match() or, if no such +limit is set, less than the default. +

+

+The pcre2_match() function always needs some heap memory, so setting a +value of zero guarantees a "heap limit exceeded" error. Details of how +pcre2_match() uses the heap are given in the +pcre2perform +documentation. +

+

+For pcre2_dfa_match(), a vector on the system stack is used when +processing pattern recursions, lookarounds, or atomic groups, and only if this +is not big enough is heap memory used. In this case, setting a value of zero +disables the use of the heap. +
+
+int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+The match_limit parameter provides a means of preventing PCRE2 from using +up too many computing resources when processing patterns that are not going to +match, but which have a very large number of possibilities in their search +trees. The classic example is a pattern that uses nested unlimited repeats. +

+

+There is an internal counter in pcre2_match() that is incremented each +time round its main matching loop. If this value reaches the match limit, +pcre2_match() returns the negative value PCRE2_ERROR_MATCHLIMIT. This has +the effect of limiting the amount of backtracking that can take place. For +patterns that are not anchored, the count restarts from zero for each position +in the subject string. This limit also applies to pcre2_dfa_match(), +though the counting is done in a different way. +

+

+When pcre2_match() is called with a pattern that was successfully +processed by pcre2_jit_compile(), the way in which matching is executed +is entirely different. However, there is still the possibility of runaway +matching that goes on for a very long time, and so the match_limit value +is also used in this case (but in a different way) to limit how long the +matching can continue. +

+

+The default value for the limit can be set when PCRE2 is built; the default is +10 million, which handles all but the most extreme cases. A value for the match +limit may also be supplied by an item at the start of a pattern of the form +

+  (*LIMIT_MATCH=ddd)
+
+where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of pcre2_match() or +pcre2_dfa_match() or, if no such limit is set, less than the default. +
+
+int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); +
+
+This parameter limits the depth of nested backtracking in pcre2_match(). +Each time a nested backtracking point is passed, a new memory frame is used +to remember the state of matching at that point. Thus, this parameter +indirectly limits the amount of memory that is used in a match. However, +because the size of each memory frame depends on the number of capturing +parentheses, the actual memory limit varies from pattern to pattern. This limit +was more useful in versions before 10.30, where function recursion was used for +backtracking. +

+

+The depth limit is not relevant, and is ignored, when matching is done using +JIT compiled code. However, it is supported by pcre2_dfa_match(), which +uses it to limit the depth of nested internal recursive function calls that +implement atomic groups, lookaround assertions, and pattern recursions. This +limits, indirectly, the amount of system stack that is used. It was more useful +in versions before 10.32, when stack memory was used for local workspace +vectors for recursive function calls. From version 10.32, only local variables +are allocated on the stack and as each call uses only a few hundred bytes, even +a small stack can support quite a lot of recursion. +

+

+If the depth of internal recursive function calls is great enough, local +workspace vectors are allocated on the heap from version 10.32 onwards, so the +depth limit also indirectly limits the amount of heap memory that is used. A +recursive pattern such as /(.(?2))((?1)|)/, when matched to a very long string +using pcre2_dfa_match(), can use a great deal of memory. However, it is +probably better to limit heap usage directly by calling +pcre2_set_heap_limit(). +

+

+The default value for the depth limit can be set when PCRE2 is built; if it is +not, the default is set to the same value as the default for the match limit. +If the limit is exceeded, pcre2_match() or pcre2_dfa_match() +returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be +supplied by an item at the start of a pattern of the form +

+  (*LIMIT_DEPTH=ddd)
+
+where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of pcre2_match() or +pcre2_dfa_match() or, if no such limit is set, less than the default. +

+
CHECKING BUILD-TIME OPTIONS
+

+int pcre2_config(uint32_t what, void *where); +

+

+The function pcre2_config() makes it possible for a PCRE2 client to find +the value of certain configuration parameters and to discover which optional +features have been compiled into the PCRE2 library. The +pcre2build +documentation has more details about these features. +

+

+The first argument for pcre2_config() specifies which information is +required. The second argument is a pointer to memory into which the information +is placed. If NULL is passed, the function returns the amount of memory that is +needed for the requested information. For calls that return numerical values, +the value is in bytes; when requesting these values, where should point +to appropriately aligned memory. For calls that return strings, the required +length is given in code units, not counting the terminating zero. +

+

+When requesting information, the returned value from pcre2_config() is +non-negative on success, or the negative error code PCRE2_ERROR_BADOPTION if +the value in the first argument is not recognized. The following information is +available: +

+  PCRE2_CONFIG_BSR
+
+The output is a uint32_t integer whose value indicates what character +sequences the \R escape sequence matches by default. A value of +PCRE2_BSR_UNICODE means that \R matches any Unicode line ending sequence; a +value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The +default can be overridden when a pattern is compiled. +
+  PCRE2_CONFIG_COMPILED_WIDTHS
+
+The output is a uint32_t integer whose lower bits indicate which code unit +widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support, +and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively. +
+  PCRE2_CONFIG_DEPTHLIMIT
+
+The output is a uint32_t integer that gives the default limit for the depth of +nested backtracking in pcre2_match() or the depth of nested recursions, +lookarounds, and atomic groups in pcre2_dfa_match(). Further details are +given with pcre2_set_depth_limit() above. +
+  PCRE2_CONFIG_HEAPLIMIT
+
+The output is a uint32_t integer that gives, in kibibytes, the default limit +for the amount of heap memory used by pcre2_match() or +pcre2_dfa_match(). Further details are given with +pcre2_set_heap_limit() above. +
+  PCRE2_CONFIG_JIT
+
+The output is a uint32_t integer that is set to one if support for just-in-time +compiling is included in the library; otherwise it is set to zero. Note that +having the support in the library does not guarantee that JIT will be used for +any given match, and neither does it guarantee that JIT will actually be able +to function, because it may not be able to allocate executable memory in some +environments. There is a special call to pcre2_jit_compile() that can be +used to check this. See the +pcre2jit +documentation for more details. +
+  PCRE2_CONFIG_JITTARGET
+
+The where argument should point to a buffer that is at least 48 code +units long. (The exact length required can be found by calling +pcre2_config() with where set to NULL.) The buffer is filled with a +string that contains the name of the architecture for which the JIT compiler is +configured, for example "x86 32bit (little endian + unaligned)". If JIT support +is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the number of +code units used is returned. This is the length of the string, plus one unit +for the terminating zero. +
+  PCRE2_CONFIG_LINKSIZE
+
+The output is a uint32_t integer that contains the number of bytes used for +internal linkage in compiled regular expressions. When PCRE2 is configured, the +value can be set to 2, 3, or 4, with the default being 2. This is the value +that is returned by pcre2_config(). However, when the 16-bit library is +compiled, a value of 3 is rounded up to 4, and when the 32-bit library is +compiled, internal linkages always use 4 bytes, so the configured value is not +relevant. +

+

+The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all +but the most massive patterns, since it allows the size of the compiled pattern +to be up to 65535 code units. Larger values allow larger regular expressions to +be compiled by those two libraries, but at the expense of slower matching. +

+  PCRE2_CONFIG_MATCHLIMIT
+
+The output is a uint32_t integer that gives the default match limit for +pcre2_match(). Further details are given with +pcre2_set_match_limit() above. +
+  PCRE2_CONFIG_NEWLINE
+
+The output is a uint32_t integer whose value specifies the default character +sequence that is recognized as meaning "newline". The values are: +
+  PCRE2_NEWLINE_CR       Carriage return (CR)
+  PCRE2_NEWLINE_LF       Linefeed (LF)
+  PCRE2_NEWLINE_CRLF     Carriage return, linefeed (CRLF)
+  PCRE2_NEWLINE_ANY      Any Unicode line ending
+  PCRE2_NEWLINE_ANYCRLF  Any of CR, LF, or CRLF
+  PCRE2_NEWLINE_NUL      The NUL character (binary zero)
+
+The default should normally correspond to the standard sequence for your +operating system. +
+  PCRE2_CONFIG_NEVER_BACKSLASH_C
+
+The output is a uint32_t integer that is set to one if the use of \C was +permanently disabled when PCRE2 was built; otherwise it is set to zero. +
+  PCRE2_CONFIG_PARENSLIMIT
+
+The output is a uint32_t integer that gives the maximum depth of nesting +of parentheses (of any kind) in a pattern. This limit is imposed to cap the +amount of system stack used when a pattern is compiled. It is specified when +PCRE2 is built; the default is 250. This limit does not take into account the +stack that may already be used by the calling application. For finer control +over compilation stack usage, see pcre2_set_compile_recursion_guard(). +
+  PCRE2_CONFIG_STACKRECURSE
+
+This parameter is obsolete and should not be used in new code. The output is a +uint32_t integer that is always set to zero. +
+  PCRE2_CONFIG_TABLES_LENGTH
+
+The output is a uint32_t integer that gives the length of PCRE2's character +processing tables in bytes. For details of these tables see the +section on locale support +below. +
+  PCRE2_CONFIG_UNICODE_VERSION
+
+The where argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +pcre2_config() with where set to NULL.) If PCRE2 has been compiled +without Unicode support, the buffer is filled with the text "Unicode not +supported". Otherwise, the Unicode version string (for example, "8.0.0") is +inserted. The number of code units used is returned. This is the length of the +string plus one unit for the terminating zero. +
+  PCRE2_CONFIG_UNICODE
+
+The output is a uint32_t integer that is set to one if Unicode support is +available; otherwise it is set to zero. Unicode support implies UTF support. +
+  PCRE2_CONFIG_VERSION
+
+The where argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +pcre2_config() with where set to NULL.) The buffer is filled with +the PCRE2 version string, zero-terminated. The number of code units used is +returned. This is the length of the string plus one unit for the terminating +zero. +

+
COMPILING A PATTERN
+

+pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); +
+
+void pcre2_code_free(pcre2_code *code); +
+
+pcre2_code *pcre2_code_copy(const pcre2_code *code); +
+
+pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); +

+

+The pcre2_compile() function compiles a pattern into an internal form. +The pattern is defined by a pointer to a string of code units and a length in +code units. If the pattern is zero-terminated, the length can be specified as +PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a length of zero is treated +as an empty string (NULL with a non-zero length causes an error return). The +function returns a pointer to a block of memory that contains the compiled +pattern and related data, or NULL if an error occurred. +

+

+If the compile context argument ccontext is NULL, memory for the compiled +pattern is obtained by calling malloc(). Otherwise, it is obtained from +the same memory function that was used for the compile context. The caller must +free the memory by calling pcre2_code_free() when it is no longer needed. +If pcre2_code_free() is called with a NULL argument, it returns +immediately, without doing anything. +

+

+The function pcre2_code_copy() makes a copy of the compiled code in new +memory, using the same memory allocator as was used for the original. However, +if the code has been processed by the JIT compiler (see +below), +the JIT information cannot be copied (because it is position-dependent). +The new copy can initially be used only for non-JIT matching, though it can be +passed to pcre2_jit_compile() if required. If pcre2_code_copy() is +called with a NULL argument, it returns NULL. +

+

+The pcre2_code_copy() function provides a way for individual threads in a +multithreaded application to acquire a private copy of shared compiled code. +However, it does not make a copy of the character tables used by the compiled +pattern; the new pattern code points to the same tables as the original code. +(See +"Locale Support" +below for details of these character tables.) In many applications the same +tables are used throughout, so this behaviour is appropriate. Nevertheless, +there are occasions when a copy of a compiled pattern and the relevant tables +are needed. The pcre2_code_copy_with_tables() provides this facility. +Copies of both the code and the tables are made, with the new code pointing to +the new tables. The memory for the new tables is automatically freed when +pcre2_code_free() is called for the new copy of the compiled code. If +pcre2_code_copy_with_tables() is called with a NULL argument, it returns +NULL. +

+

+NOTE: When one of the matching functions is called, pointers to the compiled +pattern and the subject string are set in the match data block so that they can +be referenced by the substring extraction functions after a successful match. +After running a match, you must not free a compiled pattern or a subject string +until after all operations on the +match data block +have taken place, unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for pcre2_match()" +below. +

+

+The options argument for pcre2_compile() contains various bit +settings that affect the compilation. It should be zero if none of them are +required. The available options are described below. Some of them (in +particular, those that are compatible with Perl, but some others as well) can +also be set and unset from within the pattern (see the detailed description in +the +pcre2pattern +documentation). +

+

+For those options that can be different in different parts of the pattern, the +contents of the options argument specifies their settings at the start of +compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK +options can be set at the time of matching as well as at compile time. +

+

+Some additional options and less frequently required compile-time parameters +(for example, the newline setting) can be provided in a compile context (as +described +above). +

+

+If errorcode or erroroffset is NULL, pcre2_compile() returns +NULL immediately. Otherwise, the variables to which these point are set to an +error code and an offset (number of code units) within the pattern, +respectively, when pcre2_compile() returns NULL because a compilation +error has occurred. +

+

+There are over 100 positive error codes that pcre2_compile() may return +if it finds an error in the pattern. There are also some negative error codes +that are used for invalid UTF strings when validity checking is in force. These +are the same as given by pcre2_match() and pcre2_dfa_match(), and +are described in the +pcre2unicode +documentation. There is no separate documentation for the positive error codes, +because the textual error messages that are obtained by calling the +pcre2_get_error_message() function (see "Obtaining a textual error +message" +below) +should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined +for both positive and negative error codes in pcre2.h. When compilation +is successful errorcode is set to a value that returns the message "no +error" if passed to pcre2_get_error_message(). +

+

+The value returned in erroroffset is an indication of where in the +pattern an error occurred. When there is no error, zero is returned. A non-zero +value is not necessarily the furthest point in the pattern that was read. For +example, after the error "lookbehind assertion is not fixed length", the error +offset points to the start of the failing assertion. For an invalid UTF-8 or +UTF-16 string, the offset is that of the first code unit of the failing +character. +

+

+Some errors are not detected until the whole pattern has been scanned; in these +cases, the offset passed back is the length of the pattern. Note that the +offset is in code units, not characters, even in a UTF mode. It may sometimes +point into the middle of a UTF-8 or UTF-16 character. +

+

+This code fragment shows a typical straightforward call to +pcre2_compile(): +

+  pcre2_code *re;
+  PCRE2_SIZE erroffset;
+  int errorcode;
+  re = pcre2_compile(
+    "^A.*Z",                /* the pattern */
+    PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */
+    0,                      /* default options */
+    &errorcode,             /* for error code */
+    &erroffset,             /* for error offset */
+    NULL);                  /* no compile context */
+
+
+

+
+Main compile options +
+

+The following names for option bits are defined in the pcre2.h header +file: +

+  PCRE2_ANCHORED
+
+If this bit is set, the pattern is forced to be "anchored", that is, it is +constrained to match only at the first matching point in the string that is +being searched (the "subject string"). This effect can also be achieved by +appropriate constructs in the pattern itself, which is the only way to do it in +Perl. +
+  PCRE2_ALLOW_EMPTY_CLASS
+
+By default, for compatibility with Perl, a closing square bracket that +immediately follows an opening one is treated as a data character for the +class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which +therefore contains no characters and so can never match. +
+  PCRE2_ALT_BSUX
+
+This option request alternative handling of three escape sequences, which +makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: +

+

+(1) \U matches an upper case "U" character; by default \U causes a compile +time error (Perl uses \U to upper case subsequent characters). +

+

+(2) \u matches a lower case "u" character unless it is followed by four +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, \u causes a compile time error (Perl uses it to upper +case the following character). +

+

+(3) \x matches a lower case "x" character unless it is followed by two +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, as in Perl, a hexadecimal number is always expected after +\x, but it may have zero, one, or two digits (so, for example, \xz matches a +binary zero character followed by z). +

+

+ECMAscript 6 added additional functionality to \u. This can be accessed using +the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options" +below). +Note that this alternative escape handling applies only to patterns. Neither of +these options affects the processing of replacement strings passed to +pcre2_substitute(). +

+  PCRE2_ALT_CIRCUMFLEX
+
+In multiline mode (when PCRE2_MULTILINE is set), the circumflex metacharacter +matches at the start of the subject (unless PCRE2_NOTBOL is set), and also +after any internal newline. However, it does not match after a newline at the +end of the subject, for compatibility with Perl. If you want a multiline +circumflex also to match after a terminating newline, you must set +PCRE2_ALT_CIRCUMFLEX. +
+  PCRE2_ALT_EXTENDED_CLASS
+
+Alters the parsing of character classes to follow the extended syntax +described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact +on the behaviour of the Perl-specific "(?[...])" syntax for extended classes, +but instead enables the alternative syntax of extended class behaviour inside +ordinary "[...]" character classes. See the +pcre2pattern +documentation for details of the character classes supported. +
+  PCRE2_ALT_VERBNAMES
+
+By default, for compatibility with Perl, the name in any verb sequence such as +(*MARK:NAME) is any sequence of characters that does not include a closing +parenthesis. The name is not processed in any way, and it is not possible to +include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES +option is set, normal backslash processing is applied to verb names and only an +unescaped closing parenthesis terminates the name. A closing parenthesis can be +included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED +or PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped +whitespace in verb names is skipped and #-comments are recognized, exactly as +in the rest of the pattern. +
+  PCRE2_AUTO_CALLOUT
+
+If this bit is set, pcre2_compile() automatically inserts callout items, +all with number 255, before each pattern item, except immediately before or +after an explicit callout in the pattern. For discussion of the callout +facility, see the +pcre2callout +documentation. +
+  PCRE2_CASELESS
+
+If this bit is set, letters in the pattern match both upper and lower case +letters in the subject. It is equivalent to Perl's /i option, and it can be +changed within a pattern by a (?i) option setting. If either PCRE2_UTF or +PCRE2_UCP is set, Unicode properties are used for all characters with more than +one other case, and for all characters whose code points are greater than +U+007F. +

+

+Note that there are two ASCII characters, K and S, that, in addition to +their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin +sign) and U+017F (long S) respectively. If you do not want this case +equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT. +

+

+One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +

+

+For lower valued characters with only one other case, a lookup table is used +for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used +for all code points less than 256, and higher code points (available only in +16-bit or 32-bit mode) are treated as not having another case. +

+

+From release 10.45 PCRE2_CASELESS also affects what some of the letter-related +Unicode property escapes (\p and \P) match. The properties Lu (upper case +letter), Ll (lower case letter), and Lt (title case letter) are all treated as +LC (cased letter) when PCRE2_CASELESS is set. +

+  PCRE2_DOLLAR_ENDONLY
+
+If this bit is set, a dollar metacharacter in the pattern matches only at the +end of the subject string. Without this option, a dollar also matches +immediately before a newline at the end of the string (but not before any other +newlines). The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is +set. There is no equivalent to this option in Perl, and no way to set it within +a pattern. +
+  PCRE2_DOTALL
+
+If this bit is set, a dot metacharacter in the pattern matches any character, +including one that indicates a newline. However, it only ever matches one +character, even if newlines are coded as CRLF. Without this option, a dot does +not match when the current position in the subject is at a newline. This option +is equivalent to Perl's /s option, and it can be changed within a pattern by a +(?s) option setting. A negative class such as [^a] always matches newline +characters, and the \N escape sequence always matches a non-newline character, +independent of the setting of PCRE2_DOTALL. +
+  PCRE2_DUPNAMES
+
+If this bit is set, names used to identify capture groups need not be unique. +This can be helpful for certain types of pattern when it is known that only one +instance of the named group can ever be matched. There are more details of +named capture groups below; see also the +pcre2pattern +documentation. +
+  PCRE2_ENDANCHORED
+
+If this bit is set, the end of any pattern match must be right at the end of +the string being searched (the "subject string"). If the pattern match +succeeds by reaching (*ACCEPT), but does not reach the end of the subject, the +match fails at the current starting point. For unanchored patterns, a new match +is then tried at the next starting point. However, if the match succeeds by +reaching the end of the pattern, but not the end of the subject, backtracking +occurs and an alternative match may be found. Consider these two patterns: +
+  .(*ACCEPT)|..
+  .|..
+
+If matched against "abc" with PCRE2_ENDANCHORED set, the first matches "c" +whereas the second matches "bc". The effect of PCRE2_ENDANCHORED can also be +achieved by appropriate constructs in the pattern itself, which is the only way +to do it in Perl. +

+

+For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only +to the first (that is, the longest) matched string. Other parallel matches, +which are necessarily substrings of the first one, must obviously end before +the end of the subject. +

+  PCRE2_EXTENDED
+
+If this bit is set, most white space characters in the pattern are totally +ignored except when escaped, inside a character class, or inside a \Q...\E +sequence. However, white space is not allowed within sequences such as (?> that +introduce various parenthesized groups, nor within numerical quantifiers such +as {1,3}. Ignorable white space is permitted between an item and a following +quantifier and between a quantifier and a following + that indicates +possessiveness. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be +changed within a pattern by a (?x) option setting. +

+

+When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as +white space only those characters with code points less than 256 that are +flagged as white space in its low-character table. The table is normally +created by +pcre2_maketables(), +which uses the isspace() function to identify space characters. In most +ASCII environments, the relevant characters are those with code points 0x0009 +(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D +(carriage return), and 0x0020 (space). +

+

+When PCRE2 is compiled with Unicode support, in addition to these characters, +five more Unicode "Pattern White Space" characters are recognized by +PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-right mark), +U+200F (right-to-left mark), U+2028 (line separator), and U+2029 (paragraph +separator). This set of characters is the same as recognized by Perl's /x +option. Note that the horizontal and vertical space characters that are matched +by the \h and \v escapes in patterns are a much bigger set. +

+

+As well as ignoring most white space, PCRE2_EXTENDED also causes characters +between an unescaped # outside a character class and the next newline, +inclusive, to be ignored, which makes it possible to include comments inside +complicated patterns. Note that the end of this type of comment is a literal +newline sequence in the pattern; escape sequences that happen to represent a +newline do not count. +

+

+Which characters are interpreted as newlines can be specified by a setting in +the compile context that is passed to pcre2_compile() or by a special +sequence at the start of the pattern, as described in the section entitled +"Newline conventions" +in the pcre2pattern documentation. A default is defined when PCRE2 is +built. +

+  PCRE2_EXTENDED_MORE
+
+This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space +and horizontal tab characters are ignored inside a character class. Note: only +these two characters are ignored, not the full set of pattern white space +characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is +equivalent to Perl's /xx option, and it can be changed within a pattern by a +(?xx) option setting. +
+  PCRE2_FIRSTLINE
+
+If this option is set, the start of an unanchored pattern match must be before +or at the first newline in the subject string following the start of matching, +though the matched text may continue over the newline. If startoffset is +non-zero, the limiting newline is not necessarily the first newline in the +subject. For example, if the subject string is "abc\nxyz" (where \n +represents a single-character newline) a pattern match for "yz" succeeds with +PCRE2_FIRSTLINE if startoffset is greater than 3. See also +PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If +PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first +line and also within the offset limit. In other words, whichever limit comes +first is used. This option has no effect for anchored patterns. +
+  PCRE2_LITERAL
+
+If this option is set, all meta-characters in the pattern are disabled, and it +is treated as a literal string. Matching literal strings with a regular +expression engine is not the most efficient way of doing it. If you are doing a +lot of literal matching and are worried about efficiency, you should consider +using other approaches. The only other main options that are allowed with +PCRE2_LITERAL are: PCRE2_ANCHORED, PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, +PCRE2_CASELESS, PCRE2_FIRSTLINE, PCRE2_MATCH_INVALID_UTF, +PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_UTF, and +PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and +PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error. +
+  PCRE2_MATCH_INVALID_UTF
+
+This option forces PCRE2_UTF (see below) and also enables support for matching +by pcre2_match() in subject strings that contain invalid UTF sequences. +Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. This facility is not supported for DFA matching. For details, +see the +pcre2unicode +documentation. +
+  PCRE2_MATCH_UNSET_BACKREF
+
+If this option is set, a backreference to an unset capture group matches an +empty string (by default this causes the current matching alternative to fail). +A pattern such as (\1)(a) succeeds when this option is set (assuming it can +find an "a" in the subject), whereas it fails by default, for Perl +compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka +JavaScript). +
+  PCRE2_MULTILINE
+
+By default, for the purposes of matching "start of line" and "end of line", +PCRE2 treats the subject string as consisting of a single line of characters, +even if it actually contains newlines. The "start of line" metacharacter (^) +matches only at the start of the string, and the "end of line" metacharacter +($) matches only at the end of the string, or before a terminating newline +(except when PCRE2_DOLLAR_ENDONLY is set). Note, however, that unless +PCRE2_DOTALL is set, the "any character" metacharacter (.) does not match at a +newline. This behaviour (for ^, $, and dot) is the same as Perl. +

+

+When PCRE2_MULTILINE it is set, the "start of line" and "end of line" +constructs match immediately following or immediately before internal newlines +in the subject string, respectively, as well as at the very start and end. This +is equivalent to Perl's /m option, and it can be changed within a pattern by a +(?m) option setting. Note that the "start of line" metacharacter does not match +after a newline at the end of the subject, for compatibility with Perl. +However, you can change this by setting the PCRE2_ALT_CIRCUMFLEX option. If +there are no newlines in a subject string, or no occurrences of ^ or $ in a +pattern, setting PCRE2_MULTILINE has no effect. +

+  PCRE2_NEVER_BACKSLASH_C
+
+This option locks out the use of \C in the pattern that is being compiled. +This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because +it may leave the current matching point in the middle of a multi-code-unit +character. This option may be useful in applications that process patterns from +external sources. Note that there is also a build-time option that permanently +locks out the use of \C. +
+  PCRE2_NEVER_UCP
+
+This option locks out the use of Unicode properties for handling \B, \b, \D, +\d, \S, \s, \W, \w, and some of the POSIX character classes, as described +for the PCRE2_UCP option below. In particular, it prevents the creator of the +pattern from enabling this facility by starting the pattern with (*UCP). This +option may be useful in applications that process patterns from external +sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error. +
+  PCRE2_NEVER_UTF
+
+This option locks out interpretation of the pattern as UTF-8, UTF-16, or +UTF-32, depending on which library is in use. In particular, it prevents the +creator of the pattern from switching to UTF interpretation by starting the +pattern with (*UTF). This option may be useful in applications that process +patterns from external sources. The combination of PCRE2_UTF and +PCRE2_NEVER_UTF causes an error. +
+  PCRE2_NO_AUTO_CAPTURE
+
+If this option is set, it disables the use of numbered capturing parentheses in +the pattern. Any opening parenthesis that is not followed by ? behaves as if it +were followed by ?: but named parentheses can still be used for capturing (and +they acquire numbers in the usual way). This is the same as Perl's /n option. +Note that, when this option is set, references to capture groups +(backreferences or recursion/subroutine calls) may only refer to named groups, +though the reference can be by name or by number. +
+  PCRE2_NO_AUTO_POSSESS
+
+If this (deprecated) option is set, it disables "auto-possessification", which +is an optimization that, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +set this option if you want the matching functions to do a full unoptimized +search and run all the callouts, but it is mainly provided for testing +purposes. +

+

+If a compile context is available, it is recommended to use +pcre2_set_optimize() with the directive PCRE2_AUTO_POSSESS_OFF rather +than the compile option PCRE2_NO_AUTO_POSSESS. Note that PCRE2_NO_AUTO_POSSESS +takes precedence over the pcre2_set_optimize() optimization directives +PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF. +

+  PCRE2_NO_DOTSTAR_ANCHOR
+
+If this (deprecated) option is set, it disables an optimization that is applied +when .* is the first significant item in a top-level branch of a pattern, and +all the other branches also start with .* or with \A or \G or ^. The +optimization is automatically disabled for .* if it is inside an atomic group +or a capture group that is the subject of a backreference, or if the pattern +contains (*PRUNE) or (*SKIP). When the optimization is not disabled, such a +pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items +and PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any +match must start either at the start of the subject or following a newline is +remembered. Like other optimizations, this can cause callouts to be skipped. +(If a compile context is available, it is recommended to use +pcre2_set_optimize() with the directive PCRE2_DOTSTAR_ANCHOR_OFF +instead.) +
+  PCRE2_NO_START_OPTIMIZE
+
+This is an option whose main effect is at matching time. It does not change +what pcre2_compile() generates, but it does affect the output of the JIT +compiler. Setting this option is equivalent to calling pcre2_set_optimize() +with the directive parameter set to PCRE2_START_OPTIMIZE_OFF. +

+

+There are a number of optimizations that may occur at the start of a match, in +order to speed up the process. For example, if it is known that an unanchored +match must start with a specific code unit value, the matching code searches +the subject for that value, and fails immediately if it cannot find it, without +actually running the main matching function. The start-up optimizations are +in effect a pre-scan of the subject that takes place before the pattern is run. +

+

+Disabling the start-up optimizations may cause performance to suffer. However, +this may be desirable for patterns which contain callouts or items such as +(*COMMIT) and (*MARK). See the above description of PCRE2_START_OPTIMIZE_OFF +for further details. +

+  PCRE2_NO_UTF_CHECK
+
+When PCRE2_UTF is set, the validity of the pattern as a UTF string is +automatically checked. There are discussions about the validity of +UTF-8 strings, +UTF-16 strings, +and +UTF-32 strings +in the +pcre2unicode +document. If an invalid UTF sequence is found, pcre2_compile() returns a +negative error code. +

+

+If you know that your pattern is a valid UTF string, and you want to skip this +check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When +it is set, the effect of passing an invalid UTF string as a pattern is +undefined. It may cause your program to crash or loop. +

+

+Note that this option can also be passed to pcre2_match() and +pcre2_dfa_match(), to suppress UTF validity checking of the subject +string. +

+

+Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the +error that is given if an escape sequence for an invalid Unicode code point is +encountered in the pattern. In particular, the so-called "surrogate" code +points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences +such as \x{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option, as described in the section entitled "Extra compile options" +below. +However, this is possible only in UTF-8 and UTF-32 modes, because these values +are not representable in UTF-16. +

+  PCRE2_UCP
+
+This option has two effects. Firstly, it change the way PCRE2 processes \B, +\b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes. By +default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode +properties are used to classify characters. There are some PCRE2_EXTRA +options (see below) that add finer control to this behaviour. More details are +given in the section on +generic character types +in the +pcre2pattern +page. +

+

+The second effect of PCRE2_UCP is to force the use of Unicode properties for +upper/lower casing operations, even when PCRE2_UTF is not set. This makes it +possible to process strings in the 16-bit UCS-2 code. This option is available +only if PCRE2 has been compiled with Unicode support (which is the default). +

+

+The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless +matching such that ASCII characters match only ASCII characters and non-ASCII +characters match only non-ASCII characters. The PCRE2_EXTRA_TURKISH_CASING option +(see above) alters the matching of the 'i' characters to follow their behaviour +in Turkish and Azeri languages. For further details on +PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the +pcre2unicode +page. +

+  PCRE2_UNGREEDY
+
+This option inverts the "greediness" of the quantifiers so that they are not +greedy by default, but become greedy if followed by "?". It is not compatible +with Perl. It can also be set by a (?U) option setting within the pattern. +
+  PCRE2_USE_OFFSET_LIMIT
+
+This option must be set for pcre2_compile() if +pcre2_set_offset_limit() is going to be used to set a non-default offset +limit in a match context for matches that use this pattern. An error is +generated if an offset limit is set without this option. For more details, see +the description of pcre2_set_offset_limit() in the +section +that describes match contexts. See also the PCRE2_FIRSTLINE +option above. +
+  PCRE2_UTF
+
+This option causes PCRE2 to regard both the pattern and the subject strings +that are subsequently processed as strings of UTF characters instead of +single-code-unit strings. It is available when PCRE2 is built to include +Unicode support (which is the default). If Unicode support is not available, +the use of this option provokes an error. Details of how PCRE2_UTF changes the +behaviour of PCRE2 are given in the +pcre2unicode +page. In particular, note that it changes the way PCRE2_CASELESS works. +

+
+Extra compile options +
+

+The option bits that can be set in a compile context by calling the +pcre2_set_compile_extra_options() function are as follows: +

+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+
+Since release 10.38 PCRE2 has forbidden the use of \K within lookaround +assertions, following Perl's lead. This option is provided to re-enable the +previous behaviour (act in positive lookarounds, ignore in negative ones) in +case anybody is relying on it. +
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+
+This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is +forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate" +code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode +code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot +therefore be represented in UTF-16. They can be represented in UTF-8 and +UTF-32, but are defined as invalid code points, and cause errors if encountered +in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. +

+

+These values also cause errors if encountered in escape sequences such as +\x{d912} within a pattern. However, it seems that some applications, when +using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test +for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does +not disable the error that occurs, because it applies only to the testing of +input strings for UTF validity. +

+

+If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code +point values in UTF-8 and UTF-32 patterns no longer provoke errors and are +incorporated in the compiled pattern. However, they can only match subject +characters if the matching function is called with PCRE2_NO_UTF_CHECK set. +

+  PCRE2_EXTRA_ALT_BSUX
+
+The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in +the way that ECMAscript (aka JavaScript) does. Additional functionality was +defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of +PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal +character code, where hhh.. is any number of hexadecimal digits. +
+  PCRE2_EXTRA_ASCII_BSD
+
+This option forces \d to match only ASCII digits, even when PCRE2_UCP is set. +It can be changed within a pattern by means of the (?aD) option setting. +
+  PCRE2_EXTRA_ASCII_BSS
+
+This option forces \s to match only ASCII space characters, even when +PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS) +option setting. +
+  PCRE2_EXTRA_ASCII_BSW
+
+This option forces \w to match only ASCII word characters, even when PCRE2_UCP +is set. It can be changed within a pattern by means of the (?aW) option +setting. +
+  PCRE2_EXTRA_ASCII_DIGIT
+
+This option forces the POSIX character classes [:digit:] and [:xdigit:] to +match only ASCII digits, even when PCRE2_UCP is set. It can be changed within +a pattern by means of the (?aT) option setting. +
+  PCRE2_EXTRA_ASCII_POSIX
+
+This option forces all the POSIX character classes, including [:digit:] and +[:xdigit:], to match only ASCII characters, even when PCRE2_UCP is set. It can +be changed within a pattern by means of the (?aP) option setting, but note that +this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets +all ASCII restrictions for POSIX classes. +
+  PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
+
+This is a dangerous option. Use with care. By default, an unrecognized escape +such as \j or a malformed one such as \x{2z} causes a compile-time error when +detected by pcre2_compile(). Perl is somewhat inconsistent in handling +such items: for example, \j is treated as a literal "j", and non-hexadecimal +digits in \x{} are just ignored, though warnings are given in both cases if +Perl's warning switch is enabled. However, a malformed octal number after \o{ +always causes an error in Perl. +

+

+If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to +pcre2_compile(), all unrecognized or malformed escape sequences are +treated as single-character escapes. For example, \j is a literal "j" and +\x{2z} is treated as the literal string "x{2z}". Setting this option means +that typos in patterns may go undetected and have unexpected results. Also note +that a sequence such as [\N{] is interpreted as a malformed attempt at +[\N{...}] and so is treated as [N{] whereas [\N] gives an error because an +unqualified \N is a valid escape sequence but is not supported in a character +class. To reiterate: this is a dangerous option. Use with great care. +

+  PCRE2_EXTRA_CASELESS_RESTRICT
+
+When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode +rules, which allow for more than two cases per character. There are two +case-equivalent character sets that contain both ASCII and non-ASCII +characters. The ASCII letter S is case-equivalent to U+017f (long S) and the +ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables +recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a +caseless match, both characters must either be ASCII or non-ASCII. The option +can be changed within a pattern by the (*CASELESS_RESTRICT) or (?r) option +settings. +
+  PCRE2_EXTRA_ESCAPED_CR_IS_LF
+
+There are some legacy applications where the escape sequence \r in a pattern +is expected to match a newline. If this option is set, \r in a pattern is +converted to \n so that it matches a LF (linefeed) instead of a CR (carriage +return) character. The option does not affect a literal CR in the pattern, nor +does it affect CR specified as an explicit code point such as \x{0D}. +
+  PCRE2_EXTRA_MATCH_LINE
+
+This option is provided for use by the -x option of pcre2grep. It +causes the pattern only to match complete lines. This is achieved by +automatically inserting the code for "^(?:" at the start of the compiled +pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched +line may be in the middle of the subject string. This option can be used with +PCRE2_LITERAL. +
+  PCRE2_EXTRA_MATCH_WORD
+
+This option is provided for use by the -w option of pcre2grep. It +causes the pattern only to match strings that have a word boundary at the start +and the end. This is achieved by automatically inserting the code for "\b(?:" +at the start of the compiled pattern and ")\b" at the end. The option may be +used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is +also set. +
+  PCRE2_EXTRA_NO_BS0
+
+If this option is set (note that its final character is the digit 0) it locks +out the use of the sequence \0 unless at least one more octal digit follows. +
+  PCRE2_EXTRA_PYTHON_OCTAL
+
+If this option is set, PCRE2 follows Python's rules for interpreting octal +escape sequences. The rules for handling sequences such as \14, which could +be an octal number or a back reference are different. Details are given in the +pcre2pattern +documentation. +
+  PCRE2_EXTRA_NEVER_CALLOUT
+
+If this option is set, PCRE2 treats callouts in the pattern as a syntax error, +returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the application +knows that a callout will not be provided to pcre2_match(), so that +callouts in the pattern are not silently ignored. +
+  PCRE2_EXTRA_TURKISH_CASING
+
+This option alters case-equivalence of the 'i' letters to follow the +alphabet used by Turkish and Azeri languages. The option can be changed within +a pattern by the (*TURKISH_CASING) start-of-pattern setting. Either the UTF or +UCP options must be set. In the 8-bit library, UTF must be set. This option +cannot be combined with PCRE2_EXTRA_CASELESS_RESTRICT. +

+
JUST-IN-TIME (JIT) COMPILATION
+

+int pcre2_jit_compile(pcre2_code *code, uint32_t options); +
+
+int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +
+
+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +
+
+pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); +
+
+void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); +
+
+void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); +

+

+These functions provide support for JIT compilation, which, if the just-in-time +compiler is available, further processes a compiled pattern into machine code +that executes much faster than the pcre2_match() interpretive matching +function. Full details are given in the +pcre2jit +documentation. +

+

+JIT compilation is a heavyweight optimization. It can take some time for +patterns to be analyzed, and for one-off matches and simple patterns the +benefit of faster execution might be offset by a much slower compilation time. +Most (but not all) patterns can be optimized by the JIT compiler. +

+
LOCALE SUPPORT
+

+const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); +
+
+void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); +

+

+PCRE2 handles caseless matching, and determines whether characters are letters, +digits, or whatever, by reference to a set of tables, indexed by character code +point. However, this applies only to characters whose code points are less than +256. By default, higher-valued code points never match escapes such as \w or +\d. +

+

+When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \p and \P, or, alternatively, the +PCRE2_UCP option can be set when a pattern is compiled; this causes \w and +friends to use Unicode property support instead of the built-in tables. +PCRE2_UCP also causes upper/lower casing operations on characters with code +points greater than 127 to use Unicode properties. These effects apply even +when PCRE2_UTF is not set. There are, however, some PCRE2_EXTRA options (see +above) that can be used to modify or suppress them. +

+

+The use of locales with Unicode is discouraged. If you are handling characters +with code points greater than 127, you should either use Unicode support, or +use locales, but not try to mix the two. +

+

+PCRE2 contains a built-in set of character tables that are used by default. +These are sufficient for many applications. Normally, the internal tables +recognize only ASCII characters. However, when PCRE2 is built, it is possible +to cause the internal tables to be rebuilt in the default "C" locale of the +local system, which may cause them to be different. +

+

+The built-in tables can be overridden by tables supplied by the application +that calls PCRE2. These may be created in a different locale from the default. +As more and more applications change to using Unicode, the need for this locale +support is expected to die away. +

+

+External tables are built by calling the pcre2_maketables() function, in +the relevant locale. The only argument to this function is a general context, +which can be used to pass a custom memory allocator. If the argument is NULL, +the system malloc() is used. The result can be passed to +pcre2_compile() as often as necessary, by creating a compile context and +calling pcre2_set_character_tables() to set the tables pointer therein. +

+

+For example, to build and use tables that are appropriate for the French locale +(where accented characters with values greater than 127 are treated as +letters), the following code could be used: +

+  setlocale(LC_CTYPE, "fr_FR");
+  tables = pcre2_maketables(NULL);
+  ccontext = pcre2_compile_context_create(NULL);
+  pcre2_set_character_tables(ccontext, tables);
+  re = pcre2_compile(..., ccontext);
+
+The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +are using Windows, the name for the French locale is "french". +

+

+The pointer that is passed (via the compile context) to pcre2_compile() +is saved with the compiled pattern, and the same tables are used by the +matching functions. Thus, for any single pattern, compilation and matching both +happen in the same locale, but different patterns can be processed in different +locales. +

+

+It is the caller's responsibility to ensure that the memory containing the +tables remains available while they are still in use. When they are no longer +needed, you can discard them using pcre2_maketables_free(), which should +pass as its first parameter the same global context that was used to create the +tables. +

+
+Saving locale tables +
+

+The tables described above are just a sequence of binary bytes, which makes +them independent of hardware characteristics such as endianness or whether the +processor is 32-bit or 64-bit. A copy of the result of pcre2_maketables() +can therefore be saved in a file or elsewhere and re-used later, even in a +different program or on another computer. The size of the tables (number of +bytes) must be obtained by calling pcre2_config() with the +PCRE2_CONFIG_TABLES_LENGTH option because pcre2_maketables() does not +return this value. Note that the pcre2_dftables program, which is part of +the PCRE2 build system, can be used stand-alone to create a file that contains +a set of binary tables. See the +pcre2build +documentation for details. +

+
INFORMATION ABOUT A COMPILED PATTERN
+

+int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where); +

+

+The pcre2_pattern_info() function returns general information about a +compiled pattern. For information about callouts, see the +next section. +The first argument for pcre2_pattern_info() is a pointer to the compiled +pattern. The second argument specifies which piece of information is required, +and the third argument is a pointer to a variable to receive the data. If the +third argument is NULL, the first argument is ignored, and the function returns +the size in bytes of the variable that is required for the information +requested. Otherwise, the yield of the function is zero for success, or one of +the following negative numbers: +

+  PCRE2_ERROR_NULL           the argument code was NULL
+  PCRE2_ERROR_BADMAGIC       the "magic number" was not found
+  PCRE2_ERROR_BADOPTION      the value of what was invalid
+  PCRE2_ERROR_UNSET          the requested field is not set
+
+The "magic number" is placed at the start of each compiled pattern as a simple +check against passing an arbitrary memory pointer. Here is a typical call of +pcre2_pattern_info(), to obtain the length of the compiled pattern: +
+  int rc;
+  size_t length;
+  rc = pcre2_pattern_info(
+    re,               /* result of pcre2_compile() */
+    PCRE2_INFO_SIZE,  /* what is required */
+    &length);         /* where to put the data */
+
+The possible values for the second argument are defined in pcre2.h, and +are as follows: +
+  PCRE2_INFO_ALLOPTIONS
+  PCRE2_INFO_ARGOPTIONS
+  PCRE2_INFO_EXTRAOPTIONS
+
+Return copies of the pattern's options. The third argument should point to a +uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that +were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOPTIONS returns +the compile options as modified by any top-level (*XXX) option settings such as +(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the +extra options that were set in the compile context by calling the +pcre2_set_compile_extra_options() function. +

+

+For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED +option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. +Option settings such as (?i) that can change within a pattern do not affect the +result of PCRE2_INFO_ALLOPTIONS, even if they appear right at the start of the +pattern. (This was different in some earlier releases.) +

+

+A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if +the first significant item in every top-level branch is one of the following: +

+  ^     unless PCRE2_MULTILINE is set
+  \A    always
+  \G    always
+  .*    sometimes - see below
+
+When .* is the first significant item, anchoring is possible only when all the +following are true: +
+  .* is not in an atomic group
+  .* is not in a capture group that is the subject of a backreference
+  PCRE2_DOTALL is in force for .*
+  Neither (*PRUNE) nor (*SKIP) appears in the pattern
+  PCRE2_NO_DOTSTAR_ANCHOR is not set
+  Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF
+
+For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the +options returned for PCRE2_INFO_ALLOPTIONS. +
+  PCRE2_INFO_BACKREFMAX
+
+Return the number of the highest backreference in the pattern. The third +argument should point to a uint32_t variable. Named capture groups +acquire numbers as well as names, and these count towards the highest +backreference. Backreferences such as \4 or \g{12} match the captured +characters of the given group, but in addition, the check that a capture +group is set in a conditional group such as (?(3)a|b) is also a backreference. +Zero is returned if there are no backreferences. +
+  PCRE2_INFO_BSR
+
+The output is a uint32_t integer whose value indicates what character sequences +the \R escape sequence matches. A value of PCRE2_BSR_UNICODE means that \R +matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means +that \R matches only CR, LF, or CRLF. +
+  PCRE2_INFO_CAPTURECOUNT
+
+Return the highest capture group number in the pattern. In patterns where (?| +is not used, this is also the total number of capture groups. The third +argument should point to a uint32_t variable. +
+  PCRE2_INFO_DEPTHLIMIT
+
+If the pattern set a backtracking depth limit by including an item of the form +(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +
+  PCRE2_INFO_FIRSTBITMAP
+
+In the absence of a single first code unit for a non-anchored pattern, +pcre2_compile() may construct a 256-bit table that defines a fixed set of +values for the first code unit in any match. For example, a pattern that starts +with [abc] results in a table with three bits set. When code unit values +greater than 255 are supported, the flag bit for 255 means "any code unit of +value 255 or above". If such a table was constructed, a pointer to it is +returned. Otherwise NULL is returned. The third argument should point to a +const uint8_t * variable. +
+  PCRE2_INFO_FIRSTCODETYPE
+
+Return information about the first code unit of any matched string, for a +non-anchored pattern. The third argument should point to a uint32_t +variable. If there is a fixed first value, for example, the letter "c" from a +pattern such as (cat|cow|coyote), 1 is returned, and the value can be retrieved +using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is +known that a match can occur only at the start of the subject or following a +newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0 +is returned. +
+  PCRE2_INFO_FIRSTCODEUNIT
+
+Return the value of the first code unit of any matched string for a pattern +where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third +argument should point to a uint32_t variable. In the 8-bit library, the +value is always less than 256. In the 16-bit library the value can be up to +0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff, +and up to 0xffffffff when not using UTF-32 mode. +
+  PCRE2_INFO_FRAMESIZE
+
+Return the size (in bytes) of the data frames that are used to remember +backtracking positions when the pattern is processed by pcre2_match() +without the use of JIT. The third argument should point to a size_t +variable. The frame size depends on the number of capturing parentheses in the +pattern. Each additional capture group adds two PCRE2_SIZE variables. +
+  PCRE2_INFO_HASBACKSLASHC
+
+Return 1 if the pattern contains any instances of \C, otherwise 0. The third +argument should point to a uint32_t variable. +
+  PCRE2_INFO_HASCRORLF
+
+Return 1 if the pattern contains any explicit matches for CR or LF characters, +otherwise 0. The third argument should point to a uint32_t variable. An +explicit match is either a literal CR or LF character, or \r or \n or one of +the equivalent hexadecimal or octal escape sequences. +
+  PCRE2_INFO_HEAPLIMIT
+
+If the pattern set a heap memory limit by including an item of the form +(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +
+  PCRE2_INFO_JCHANGED
+
+Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise +0. The third argument should point to a uint32_t variable. (?J) and +(?-J) set and unset the local PCRE2_DUPNAMES option, respectively. +
+  PCRE2_INFO_JITSIZE
+
+If the compiled pattern was successfully processed by +pcre2_jit_compile(), return the size of the JIT compiled code, otherwise +return zero. The third argument should point to a size_t variable. +
+  PCRE2_INFO_LASTCODETYPE
+
+Returns 1 if there is a rightmost literal code unit that must exist in any +matched string, other than at its start. The third argument should point to a +uint32_t variable. If there is no such value, 0 is returned. When 1 is +returned, the code unit value itself can be retrieved using +PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is +recorded only if it follows something of variable length. For example, for the +pattern /^a\d+z\d+/ the returned value is 1 (with "z" returned from +PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0. +
+  PCRE2_INFO_LASTCODEUNIT
+
+Return the value of the rightmost literal code unit that must exist in any +matched string, other than at its start, for a pattern where +PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argument +should point to a uint32_t variable. +
+  PCRE2_INFO_MATCHEMPTY
+
+Return 1 if the pattern might match an empty string, otherwise 0. The third +argument should point to a uint32_t variable. When a pattern contains +recursive subroutine calls it is not always possible to determine whether or +not it can match an empty string. PCRE2 takes a cautious approach and returns 1 +in such cases. +
+  PCRE2_INFO_MATCHLIMIT
+
+If the pattern set a match limit by including an item of the form +(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +
+  PCRE2_INFO_MAXLOOKBEHIND
+
+A lookbehind assertion moves back a certain number of characters (not code +units) when it starts to process each of its branches. This request returns the +largest of these backward moves. The third argument should point to a uint32_t +integer. The simple assertions \b and \B require a one-character lookbehind +and cause PCRE2_INFO_MAXLOOKBEHIND to return 1 in the absence of anything +longer. \A also registers a one-character lookbehind, though it does not +actually inspect the previous character. +

+

+Note that this information is useful for multi-segment matching only +if the pattern contains no nested lookbehinds. For example, the pattern +(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the +first lookbehind moves back by two characters, matches one character, then the +nested lookbehind also moves back by two characters. This puts the matching +point three characters earlier than it was at the start. +PCRE2_INFO_MAXLOOKBEHIND is really only useful as a debugging tool. See the +pcre2partial +documentation for a discussion of multi-segment matching. +

+  PCRE2_INFO_MINLENGTH
+
+If a minimum length for matching subject strings was computed, its value is +returned. Otherwise the returned value is 0. This value is not computed when +PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in +UTF mode may be different from the number of code units. The third argument +should point to a uint32_t variable. The value is a lower bound to the +length of any matching string. There may not be any strings of that length that +do actually match, but every string that does match is at least that long. +
+  PCRE2_INFO_NAMECOUNT
+  PCRE2_INFO_NAMEENTRYSIZE
+  PCRE2_INFO_NAMETABLE
+
+PCRE2 supports the use of named as well as numbered capturing parentheses. The +names are just an additional way of identifying the parentheses, which still +acquire numbers. Several convenience functions such as +pcre2_substring_get_byname() are provided for extracting captured +substrings by name. It is also possible to extract the data directly, by first +converting the name to a number in order to access the correct pointers in the +output vector (described with pcre2_match() below). To do the conversion, +you need to use the name-to-number map, which is described by these three +values. +

+

+The map consists of a number of fixed-size entries. PCRE2_INFO_NAMECOUNT gives +the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives the size of each +entry in code units; both of these return a uint32_t value. The entry +size depends on the length of the longest name. +

+

+PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table. This is +a PCRE2_SPTR pointer to a block of code units. In the 8-bit library, the first +two bytes of each entry are the number of the capturing parenthesis, most +significant byte first. In the 16-bit library, the pointer points to 16-bit +code units, the first of which contains the parenthesis number. In the 32-bit +library, the pointer points to 32-bit code units, the first of which contains +the parenthesis number. The rest of the entry is the corresponding name, zero +terminated. +

+

+The names are in alphabetical order. If (?| is used to create multiple capture +groups with the same number, as described in the +section on duplicate group numbers +in the +pcre2pattern +page, the groups may be given the same name, but there is only one entry in the +table. Different names for groups of the same number are not permitted. +

+

+Duplicate names for capture groups with different numbers are permitted, but +only if PCRE2_DUPNAMES is set. They appear in the table in the order in which +they were found in the pattern. In the absence of (?| this is the order of +increasing number; when (?| is used this is not necessarily the case because +later capture groups may have lower numbers. +

+

+As a simple example of the name/number table, consider the following pattern +after compilation by the 8-bit library (assume PCRE2_EXTENDED is set, so white +space - including newlines - is ignored): +

+  (?<date> (?<year>(\d\d)?\d\d) - (?<month>\d\d) - (?<day>\d\d) )
+
+There are four named capture groups, so the table has four entries, and each +entry in the table is eight bytes long. The table is as follows, with +non-printing bytes shows in hexadecimal, and undefined bytes shown as ??: +
+  00 01 d  a  t  e  00 ??
+  00 05 d  a  y  00 ?? ??
+  00 04 m  o  n  t  h  00
+  00 02 y  e  a  r  00 ??
+
+When writing code to extract data from named capture groups using the +name-to-number map, remember that the length of the entries is likely to be +different for each compiled pattern. +
+  PCRE2_INFO_NEWLINE
+
+The output is one of the following uint32_t values: +
+  PCRE2_NEWLINE_CR       Carriage return (CR)
+  PCRE2_NEWLINE_LF       Linefeed (LF)
+  PCRE2_NEWLINE_CRLF     Carriage return, linefeed (CRLF)
+  PCRE2_NEWLINE_ANY      Any Unicode line ending
+  PCRE2_NEWLINE_ANYCRLF  Any of CR, LF, or CRLF
+  PCRE2_NEWLINE_NUL      The NUL character (binary zero)
+
+This identifies the character sequence that will be recognized as meaning +"newline" while matching. +
+  PCRE2_INFO_SIZE
+
+Return the size of the compiled pattern in bytes (for all three libraries). The +third argument should point to a size_t variable. This value includes the +size of the general data block that precedes the code units of the compiled +pattern itself. The value that is used when pcre2_compile() is getting +memory in which to place the compiled pattern may be slightly larger than the +value returned by this option, because there are cases where the code that +calculates the size has to over-estimate. Processing a pattern with the JIT +compiler does not alter the value returned by this option. +

+
INFORMATION ABOUT A PATTERN'S CALLOUTS
+

+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +
+
+A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling pcre2_callout_enumerate(). The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +user_data value that was passed to pcre2_callout_enumerate(). The +contents of the callout enumeration block are described in the +pcre2callout +documentation, which also gives further details about callouts. +

+
SERIALIZATION AND PRECOMPILING
+

+It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. The host on which the patterns are +reloaded must be running the same version of PCRE2, with the same code unit +width, and must also have the same endianness, pointer width, and PCRE2_SIZE +type. Before compiled patterns can be saved, they must be converted to a +"serialized" form, which in the case of PCRE2 is really just a bytecode dump. +The functions whose names begin with pcre2_serialize_ are used for +converting to and from the serialized form. They are described in the +pcre2serialize +documentation. Note that PCRE2 serialization does not convert compiled patterns +to an abstract format like Java or .NET serialization. +

+
THE MATCH DATA BLOCK
+

+pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); +
+
+pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); +
+
+void pcre2_match_data_free(pcre2_match_data *match_data); +

+

+Information about a successful or unsuccessful match is placed in a match +data block, which is an opaque structure that is accessed by function calls. In +particular, the match data block contains a vector of offsets into the subject +string that define the matched parts of the subject. This is known as the +ovector. +

+

+Before calling pcre2_match(), pcre2_dfa_match(), or +pcre2_jit_match() you must create a match data block by calling one of +the creation functions above. For pcre2_match_data_create(), the first +argument is the number of pairs of offsets in the ovector. +

+

+When using pcre2_match(), one pair of offsets is required to identify the +string that matched the whole pattern, with an additional pair for each +captured substring. For example, a value of 4 creates enough space to record +the matched portion of the subject plus three captured substrings. +

+

+When using pcre2_dfa_match() there may be multiple matched substrings of +different lengths at the same point in the subject. The ovector should be made +large enough to hold as many as are expected. +

+

+A minimum of at least 1 pair is imposed by pcre2_match_data_create(), so +it is always possible to return the overall matched string in the case of +pcre2_match() or the longest match in the case of +pcre2_dfa_match(). The maximum number of pairs is 65535; if the first +argument of pcre2_match_data_create() is greater than this, 65535 is +used. +

+

+The second argument of pcre2_match_data_create() is a pointer to a +general context, which can specify custom memory management for obtaining the +memory for the match data block. If you are not using custom memory management, +pass NULL, which causes malloc() to be used. +

+

+For pcre2_match_data_create_from_pattern(), the first argument is a +pointer to a compiled pattern. The ovector is created to be exactly the right +size to hold all the substrings a pattern might capture when matched using +pcre2_match(). You should not use this call when matching with +pcre2_dfa_match(). The second argument is again a pointer to a general +context, but in this case if NULL is passed, the memory is obtained using the +same allocator that was used for the compiled pattern (custom or default). +

+

+A match data block can be used many times, with the same or different compiled +patterns. You can extract information from a match data block after a match +operation has finished, using functions that are described in the sections on +matched strings +and +other match data +below. +

+

+When a call of pcre2_match() fails, valid data is available in the match +block only when the error is PCRE2_ERROR_NOMATCH, PCRE2_ERROR_PARTIAL, or one +of the error codes for an invalid UTF string. Exactly what is available depends +on the error, and is detailed below. +

+

+When one of the matching functions is called, pointers to the compiled pattern +and the subject string are set in the match data block so that they can be +referenced by the extraction functions after a successful match. After running +a match, you must not free a compiled pattern or a subject string until after +all operations on the match data block (for that match) have taken place, +unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for pcre2_match()" +below. +

+

+When a match data block itself is no longer needed, it should be freed by +calling pcre2_match_data_free(). If this function is called with a NULL +argument, it returns immediately, without doing anything. +

+
MEMORY USE FOR MATCH DATA BLOCKS
+

+PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); +

+

+The size of a match data block depends on the size of the ovector that it +contains. The function pcre2_get_match_data_size() returns the size, in +bytes, of the block that is its argument. +

+

+When pcre2_match() runs interpretively (that is, without using JIT), it +makes use of a vector of data frames for remembering backtracking positions. +The size of each individual frame depends on the number of capturing +parentheses in the pattern and can be obtained by calling +pcre2_pattern_info() with the PCRE2_INFO_FRAMESIZE option (see the +section entitled "Information about a compiled pattern" +above). +

+

+Heap memory is used for the frames vector; if the initial memory block turns +out to be too small during matching, it is automatically expanded. When +pcre2_match() returns, the memory is not freed, but remains attached to +the match data block, for use by any subsequent matches that use the same +block. It is automatically freed when the match data block itself is freed. +

+

+You can find the current size of the frames vector that a match data block owns +by calling pcre2_get_match_data_heapframes_size(). For a newly created +match data block the size will be zero. Some types of match may require a lot +of frames and thus a large vector; applications that run in environments where +memory is constrained can check this and free the match data block if the heap +frames vector has become too big. +

+
MATCHING A PATTERN: THE TRADITIONAL FUNCTION
+

+int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); +

+

+The function pcre2_match() is called to match a subject string against a +compiled pattern, which is passed in the code argument. You can call +pcre2_match() with the same code argument as many times as you +like, in order to find multiple matches in the subject string or to match +different subject strings with the same pattern. +

+

+This function is the main matching facility of the library, and it operates in +a Perl-like manner. For specialist use there is also an alternative matching +function, which is described +below +in the section about the pcre2_dfa_match() function. +

+

+Here is an example of a simple call to pcre2_match(): +

+  pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+  int rc = pcre2_match(
+    re,             /* result of pcre2_compile() */
+    "some string",  /* the subject string */
+    11,             /* the length of the subject string */
+    0,              /* start at offset 0 in the subject */
+    0,              /* default options */
+    md,             /* the match data block */
+    NULL);          /* a match context; NULL means use defaults */
+
+If the subject string is zero-terminated, the length can be given as +PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common +matching parameters are to be changed. For details, see the section on +the match context +above. +

+
+The string to be matched by pcre2_match() +
+

+The subject string is passed to pcre2_match() as a pointer in +subject, a length in length, and a starting offset in +startoffset. The length and offset are in code units, not characters. +That is, they are in bytes for the 8-bit library, 16-bit code units for the +16-bit library, and 32-bit code units for the 32-bit library, whether or not +UTF processing is enabled. As a special case, if subject is NULL and +length is zero, the subject is assumed to be an empty string. If +length is non-zero, an error occurs if subject is NULL. +

+

+If startoffset is greater than the length of the subject, +pcre2_match() returns PCRE2_ERROR_BADOFFSET. When the starting offset is +zero, the search for a match starts at the beginning of the subject, and this +is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset +must point to the start of a character, or to the end of the subject (in UTF-32 +mode, one code unit equals one character, so all offsets are valid). Like the +pattern string, the subject may contain binary zeros. +

+

+A non-zero starting offset is useful when searching for another match in the +same subject by calling pcre2_match() again after a previous success. +Setting startoffset differs from passing over a shortened string and +setting PCRE2_NOTBOL in the case of a pattern that begins with any kind of +lookbehind. For example, consider the pattern +

+  \Biss\B
+
+which finds occurrences of "iss" in the middle of words. (\B matches only if +the current position in the subject is not a word boundary.) When applied to +the string "Mississippi" the first call to pcre2_match() finds the first +occurrence. If pcre2_match() is called again with just the remainder of +the subject, namely "issippi", it does not match, because \B is always false +at the start of the subject, which is deemed to be a word boundary. However, if +pcre2_match() is passed the entire string again, but with +startoffset set to 4, it finds the second occurrence of "iss" because it +is able to look behind the starting point to discover that it is preceded by a +letter. +

+

+Finding all the matches in a subject is tricky when the pattern can match an +empty string. It is possible to emulate Perl's /g behaviour by first trying the +match again at the same offset, with the PCRE2_NOTEMPTY_ATSTART and +PCRE2_ANCHORED options, and then if that fails, advancing the starting offset +and trying an ordinary match again. There is some code that demonstrates how to +do this in the +pcre2demo +sample program. In the most general case, you have to check to see if the +newline convention recognizes CRLF as a newline, and if so, and the current +character is CR followed by LF, advance the starting offset by two characters +instead of one. +

+

+If a non-zero starting offset is passed when the pattern is anchored, a single +attempt to match at the given offset is made. This can only succeed if the +pattern does not require the match to be at the start of the subject. In other +words, the anchoring must be the result of setting the PCRE2_ANCHORED option or +the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \A. +

+
+Option bits for pcre2_match() +
+

+The unused bits of the options argument for pcre2_match() must be +zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_ENDANCHORED, +PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, +PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. +Their action is described below. +

+

+Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by +the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the +interpretive code in pcre2_match() is run. +PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from PCRE2_NO_JIT +(obviously), the remaining options are supported for JIT matching. +

+  PCRE2_ANCHORED
+
+The PCRE2_ANCHORED option limits pcre2_match() to matching at the first +matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out +to be anchored by virtue of its contents, it cannot be made unachored at +matching time. Note that setting the option at match time disables JIT +matching. +
+  PCRE2_COPY_MATCHED_SUBJECT
+
+By default, a pointer to the subject is remembered in the match data block so +that, after a successful match, it can be referenced by the substring +extraction functions. This means that the subject's memory must not be freed +until all such operations are complete. For some applications where the +lifetime of the subject string is not guaranteed, it may be necessary to make a +copy of the subject string, but it is wasteful to do this unless the match is +successful. After a successful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the +subject is copied and the new pointer is remembered in the match data block +instead of the original subject pointer. The memory allocator that was used for +the match block itself is used. The copy is automatically freed when +pcre2_match_data_free() is called to free the match data block. It is also +automatically freed if the match data block is re-used for another match +operation. +
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+
+This option is relevant only to pcre2_match() for interpretive matching. +It is ignored when JIT is used, and is forbidden for pcre2_dfa_match(). +

+

+The use of recursion in patterns can lead to infinite loops. In the +interpretive matcher these would be eventually caught by the match or heap +limits, but this could take a long time and/or use a lot of memory if the +limits are large. There is therefore a check at the start of each recursion. +If the same group is still active from a previous call, and the current subject +pointer is the same as it was at the start of that group, and the furthest +inspected character of the subject has not changed, an error is generated. +

+

+There are rare cases of matches that would complete, but nevertheless trigger +this error. This option disables the check. It is provided mainly for testing +when comparing JIT and interpretive behaviour. +

+  PCRE2_ENDANCHORED
+
+If the PCRE2_ENDANCHORED option is set, any string that pcre2_match() +matches must be right at the end of the subject string. Note that setting the +option at match time disables JIT matching. +
+  PCRE2_NOTBOL
+
+This option specifies that first character of the subject string is not the +beginning of a line, so the circumflex metacharacter should not match before +it. Setting this without having set PCRE2_MULTILINE at compile time causes +circumflex never to match. This option affects only the behaviour of the +circumflex metacharacter. It does not affect \A. +
+  PCRE2_NOTEOL
+
+This option specifies that the end of the subject string is not the end of a +line, so the dollar metacharacter should not match it nor (except in multiline +mode) a newline immediately before it. Setting this without having set +PCRE2_MULTILINE at compile time causes dollar never to match. This option +affects only the behaviour of the dollar metacharacter. It does not affect \Z +or \z. +
+  PCRE2_NOTEMPTY
+
+An empty string is not considered to be a valid match if this option is set. If +there are alternatives in the pattern, they are tried. If all the alternatives +match the empty string, the entire match fails. For example, if the pattern +
+  a?b?
+
+is applied to a string not beginning with "a" or "b", it matches an empty +string at the start of the subject. With PCRE2_NOTEMPTY set, this match is not +valid, so pcre2_match() searches further into the string for occurrences +of "a" or "b". +
+  PCRE2_NOTEMPTY_ATSTART
+
+This is like PCRE2_NOTEMPTY, except that it locks out an empty string match +only at the first matching position, that is, at the start of the subject plus +the starting offset. An empty string match later in the subject is permitted. +If the pattern is anchored, such a match can occur only if the pattern contains +\K. +
+  PCRE2_NO_JIT
+
+By default, if a pattern has been successfully processed by +pcre2_jit_compile(), JIT is automatically used when pcre2_match() +is called with options that JIT supports. Setting PCRE2_NO_JIT disables the use +of JIT; it forces matching to be done by the interpreter. +
+  PCRE2_NO_UTF_CHECK
+
+When PCRE2_UTF is set at compile time, the validity of the subject as a UTF +string is checked unless PCRE2_NO_UTF_CHECK is passed to pcre2_match() or +PCRE2_MATCH_INVALID_UTF was passed to pcre2_compile(). The latter special +case is discussed in detail in the +pcre2unicode +documentation. +

+

+In the default case, if a non-zero starting offset is given, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \b and \B are +one-character lookbehinds. +

+

+The check is carried out before any other processing takes place, and a +negative error code is returned if the check fails. There are several UTF error +codes for each code unit width, corresponding to different problems with the +code unit sequence. There are discussions about the validity of +UTF-8 strings, +UTF-16 strings, +and +UTF-32 strings +in the +pcre2unicode +documentation. +

+

+If you know that your subject is valid, and you want to skip this check for +performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling +pcre2_match(). You might want to do this for the second and subsequent +calls to pcre2_match() if you are making repeated calls to find multiple +matches in the same subject string. +

+

+Warning: Unless PCRE2_MATCH_INVALID_UTF was set at compile time, when +PCRE2_NO_UTF_CHECK is set at match time the effect of passing an invalid +string as a subject, or an invalid value of startoffset, is undefined. +Your program may crash or loop indefinitely or give wrong results. +

+  PCRE2_PARTIAL_HARD
+  PCRE2_PARTIAL_SOFT
+
+These options turn on the partial matching feature. A partial match occurs if +the end of the subject string is reached successfully, but there are not enough +subject characters to complete the match. In addition, either at least one +character must have been inspected or the pattern must contain a lookbehind, or +the pattern must be one that could match an empty string. +

+

+If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) +is set, matching continues by testing any remaining alternatives. Only if no +complete match can be found is PCRE2_ERROR_PARTIAL returned instead of +PCRE2_ERROR_NOMATCH. In other words, PCRE2_PARTIAL_SOFT specifies that the +caller is prepared to handle a partial match, but only if no complete match can +be found. +

+

+If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this case, if +a partial match is found, pcre2_match() immediately returns +PCRE2_ERROR_PARTIAL, without considering any other alternatives. In other +words, when PCRE2_PARTIAL_HARD is set, a partial match is considered to be more +important that an alternative complete match. +

+

+There is a more detailed discussion of partial and multi-segment matching, with +examples, in the +pcre2partial +documentation. +

+
NEWLINE HANDLING WHEN MATCHING
+

+When PCRE2 is built, a default newline convention is set; this is usually the +standard convention for the operating system. The default can be overridden in +a +compile context +by calling pcre2_set_newline(). It can also be overridden by starting a +pattern string with, for example, (*CRLF), as described in the +section on newline conventions +in the +pcre2pattern +page. During matching, the newline choice affects the behaviour of the dot, +circumflex, and dollar metacharacters. It may also alter the way the match +starting position is advanced after a match failure for an unanchored pattern. +

+

+When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is set as +the newline convention, and a match attempt for an unanchored pattern fails +when the current starting position is at a CRLF sequence, and the pattern +contains no explicit matches for CR or LF characters, the match position is +advanced by two characters instead of one, in other words, to after the CRLF. +

+

+The above rule is a compromise that makes the most common cases work as +expected. For example, if the pattern is .+A (and the PCRE2_DOTALL option is +not set), it does not match the string "\r\nA" because, after failing at the +start, it skips both the CR and the LF before retrying. However, the pattern +[\r\n]A does match that string, because it contains an explicit CR or LF +reference, and so advances only by one character after the first failure. +

+

+An explicit match for CR of LF is either a literal appearance of one of those +characters in the pattern, or one of the \r or \n or equivalent octal or +hexadecimal escape sequences. Implicit matches such as [^X] do not count, nor +does \s, even though it includes CR and LF in the characters that it matches. +

+

+Notwithstanding the above, anomalous effects may still occur when CRLF is a +valid newline sequence and explicit \r or \n escapes appear in the pattern. +

+
HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
+

+uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); +
+
+PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); +

+

+In general, a pattern matches a certain portion of the subject, and in +addition, further substrings from the subject may be picked out by +parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's +book, this is called "capturing" in what follows, and the phrase "capture +group" (Perl terminology) is used for a fragment of a pattern that picks out a +substring. PCRE2 supports several other kinds of parenthesized group that do +not cause substrings to be captured. The pcre2_pattern_info() function +can be used to find out how many capture groups there are in a compiled +pattern. +

+

+You can use auxiliary functions for accessing captured substrings +by number +or +by name, +as described in sections below. +

+

+Alternatively, you can make direct use of the vector of PCRE2_SIZE values, +called the ovector, which contains the offsets of captured strings. It is +part of the +match data block. +The function pcre2_get_ovector_pointer() returns the address of the +ovector, and pcre2_get_ovector_count() returns the number of pairs of +values it contains. +

+

+Within the ovector, the first in each pair of values is set to the offset of +the first code unit of a substring, and the second is set to the offset of the +first code unit after the end of a substring. These values are always code unit +offsets, not character offsets. That is, they are byte offsets in the 8-bit +library, 16-bit offsets in the 16-bit library, and 32-bit offsets in the 32-bit +library. +

+

+After a partial match (error return PCRE2_ERROR_PARTIAL), only the first pair +of offsets (that is, ovector[0] and ovector[1]) are set. They +identify the part of the subject that was partially matched. See the +pcre2partial +documentation for details of partial matching. +

+

+After a fully successful match, the first pair of offsets identifies the +portion of the subject string that was matched by the entire pattern. The next +pair is used for the first captured substring, and so on. The value returned by +pcre2_match() is one more than the highest numbered pair that has been +set. For example, if two substrings have been captured, the returned value is +3. If there are no captured substrings, the return value from a successful +match is 1, indicating that just the first pair of offsets has been set. +

+

+If a pattern uses the \K escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\K) is matched against "ab", the start and +end offset values for the match are 2 and 0. +

+

+If a capture group is matched repeatedly within a single match operation, it is +the last portion of the subject that it matched that is returned. +

+

+If the ovector is too small to hold all the captured substring offsets, as much +as possible is filled in, and the function returns a value of zero. If captured +substrings are not of interest, pcre2_match() may be called with a match +data block whose ovector is of minimum length (that is, one pair). +

+

+It is possible for capture group number n+1 to match some part of the +subject when group n has not been used at all. For example, if the string +"abc" is matched against the pattern (a|(z))(bc) the return from the function +is 4, and groups 1 and 3 are matched, but 2 is not. When this happens, both +values in the offset pairs corresponding to unused groups are set to +PCRE2_UNSET. +

+

+Offset values that correspond to unused groups at the end of the expression are +also set to PCRE2_UNSET. For example, if the string "abc" is matched against +the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the +function is 2, because the highest used capture group number is 1. The offsets +for the second and third capture groups (assuming the vector is large enough, +of course) are set to PCRE2_UNSET. +

+

+Elements in the ovector that do not correspond to capturing parentheses in the +pattern are never changed. That is, if a pattern contains n capturing +parentheses, no more than ovector[0] to ovector[2n+1] are set by +pcre2_match(). The other elements retain whatever values they previously +had. After a failed match attempt, the contents of the ovector are unchanged. +

+
OTHER INFORMATION ABOUT A MATCH
+

+PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); +
+
+PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); +

+

+As well as the offsets in the ovector, other information about a match is +retained in the match data block and can be retrieved by the above functions in +appropriate circumstances. If they are called at other times, the result is +undefined. +

+

+After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure +to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function +pcre2_get_mark() can be called to access this name, which can be +specified in the pattern by any of the backtracking control verbs, not just +(*MARK). The same function applies to all the verbs. It returns a pointer to +the zero-terminated name, which is within the compiled pattern. If no name is +available, NULL is returned. The length of the name (excluding the terminating +zero) is stored in the code unit that precedes the name. You should use this +length instead of relying on the terminating zero if the name might contain a +binary zero. +

+

+After a successful match, the name that is returned is the last mark name +encountered on the matching path through the pattern. Instances of backtracking +verbs without names do not count. Thus, for example, if the matching path +contains (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a +partial match, the last encountered name is returned. For example, consider +this pattern: +

+  ^(*MARK:A)((*MARK:B)a|b)c
+
+When it matches "bc", the returned name is A. The B mark is "seen" in the first +branch of the group, but it is not on the matching path. On the other hand, +when this pattern fails to match "bx", the returned name is B. +

+

+Warning: By default, certain start-of-match optimizations are used to +give a fast "no match" result in some situations. For example, if the anchoring +is removed from the pattern above, there is an initial check for the presence +of "c" in the subject before running the matching engine. This check fails for +"bx", causing a match failure without seeing any marks. You can disable the +start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for +pcre2_compile() or by starting the pattern with (*NO_START_OPT). +

+

+After a successful match, a partial match, or one of the invalid UTF errors +(for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can be +called. After a successful or partial match it returns the code unit offset of +the character at which the match started. For a non-partial match, this can be +different to the value of ovector[0] if the pattern contains the \K +escape sequence. After a partial match, however, this value is always the same +as ovector[0] because \K does not affect the result of a partial match. +

+

+After a UTF check failure, pcre2_get_startchar() can be used to obtain +the code unit offset of the invalid UTF character. Details are given in the +pcre2unicode +page. +

+
ERROR RETURNS FROM pcre2_match()
+

+If pcre2_match() fails, it returns a negative number. This can be +converted to a text string by calling the pcre2_get_error_message() +function (see "Obtaining a textual error message" +below). +Negative error codes are also returned by other functions, and are documented +with them. The codes are given names in the header file. If UTF checking is in +force and an invalid UTF subject string is detected, one of a number of +UTF-specific negative error codes is returned. Details are given in the +pcre2unicode +page. The following are the other errors that may be returned by +pcre2_match(): +

+  PCRE2_ERROR_NOMATCH
+
+The subject string did not match the pattern. +
+  PCRE2_ERROR_PARTIAL
+
+The subject string did not match, but it did match partially. See the +pcre2partial +documentation for details of partial matching. +
+  PCRE2_ERROR_BADMAGIC
+
+PCRE2 stores a 4-byte "magic number" at the start of the compiled code, to +catch the case when it is passed a junk pointer. This is the error that is +returned when the magic number is not present. +
+  PCRE2_ERROR_BADMODE
+
+This error is given when a compiled pattern is passed to a function in a +library of a different code unit width, for example, a pattern compiled by +the 8-bit library is passed to a 16-bit or 32-bit library function. +
+  PCRE2_ERROR_BADOFFSET
+
+The value of startoffset was greater than the length of the subject. +
+  PCRE2_ERROR_BADOPTION
+
+An unrecognized bit was set in the options argument. +
+  PCRE2_ERROR_BADUTFOFFSET
+
+The UTF code unit sequence that was passed as a subject was checked and found +to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the value of +startoffset did not point to the beginning of a UTF character or the end +of the subject. +
+  PCRE2_ERROR_CALLOUT
+
+This error is never generated by pcre2_match() itself. It is provided for +use by callout functions that want to cause pcre2_match() or +pcre2_callout_enumerate() to return a distinctive error code. See the +pcre2callout +documentation for details. +
+  PCRE2_ERROR_DEPTHLIMIT
+
+The nested backtracking depth limit was reached. +
+  PCRE2_ERROR_HEAPLIMIT
+
+The heap limit was reached. +
+  PCRE2_ERROR_INTERNAL
+
+An unexpected internal error has occurred. This error could be caused by a bug +in PCRE2 or by overwriting of the compiled pattern. +
+  PCRE2_ERROR_JIT_STACKLIMIT
+
+This error is returned when a pattern that was successfully studied using JIT +is being matched, but the memory available for the just-in-time processing +stack is not large enough. See the +pcre2jit +documentation for more details. +
+  PCRE2_ERROR_MATCHLIMIT
+
+The backtracking match limit was reached. +
+  PCRE2_ERROR_NOMEMORY
+
+Heap memory is used to remember backtracking points. This error is given when +the memory allocation function (default or custom) fails. Note that a different +error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds +the heap limit. PCRE2_ERROR_NOMEMORY is also returned if +PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. +
+  PCRE2_ERROR_NULL
+
+Either the code, subject, or match_data argument was passed +as NULL. +
+  PCRE2_ERROR_RECURSELOOP
+
+This error is returned when pcre2_match() detects a recursion loop within +the pattern. Specifically, it means that either the whole pattern or a +capture group has been called recursively for the second time at the same +position in the subject string. Some simple patterns that might do this are +detected and faulted at compile time, but more complicated cases, in particular +mutual recursions between two different groups, cannot be detected until +matching is attempted. +

+
OBTAINING A TEXTUAL ERROR MESSAGE
+

+int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); +

+

+A text message for an error code from any PCRE2 function (compile, match, or +auxiliary) can be obtained by calling pcre2_get_error_message(). The code +is passed as the first argument, with the remaining two arguments specifying a +code unit buffer and its length in code units, into which the text message is +placed. The message is returned in code units of the appropriate width for the +library that is being used. +

+

+The returned message is terminated with a trailing zero, and the function +returns the number of code units used, excluding the trailing zero. If the +error number is unknown, the negative error code PCRE2_ERROR_BADDATA is +returned. If the buffer is too small, the message is truncated (but still with +a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned. +None of the messages are very long; a buffer size of 120 code units is ample. +

+
EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
+

+int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); +
+
+int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); +
+
+void pcre2_substring_free(PCRE2_UCHAR *buffer); +

+

+Captured substrings can be accessed directly by using the ovector as described +above. +For convenience, auxiliary functions are provided for extracting captured +substrings as new, separate, zero-terminated strings. A substring that contains +a binary zero is correctly extracted and has a further zero added on the end, +but the result is not, of course, a C string. +

+

+The functions in this section identify substrings by number. The number zero +refers to the entire matched substring, with higher numbers referring to +substrings captured by parenthesized groups. After a partial match, only +substring zero is available. An attempt to extract any other substring gives +the error PCRE2_ERROR_PARTIAL. The next section describes similar functions for +extracting captured substrings by name. +

+

+If a pattern uses the \K escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\K) is matched against "ab", the start and +end offset values for the match are 2 and 0. In this situation, calling these +functions with a zero substring number extracts a zero-length empty string. +

+

+You can find the length in code units of a captured substring without +extracting it by calling pcre2_substring_length_bynumber(). The first +argument is a pointer to the match data block, the second is the group number, +and the third is a pointer to a variable into which the length is placed. If +you just want to know whether or not the substring has been captured, you can +pass the third argument as NULL. +

+

+The pcre2_substring_copy_bynumber() function copies a captured substring +into a supplied buffer, whereas pcre2_substring_get_bynumber() copies it +into new memory, obtained using the same memory allocation function that was +used for the match data block. The first two arguments of these functions are a +pointer to the match data block and a capture group number. +

+

+The final arguments of pcre2_substring_copy_bynumber() are a pointer to +the buffer and a pointer to a variable that contains its length in code units. +This is updated to contain the actual number of code units used for the +extracted substring, excluding the terminating zero. +

+

+For pcre2_substring_get_bynumber() the third and fourth arguments point +to variables that are updated with a pointer to the new memory and the number +of code units that comprise the substring, again excluding the terminating +zero. When the substring is no longer needed, the memory should be freed by +calling pcre2_substring_free(). +

+

+The return value from all these functions is zero for success, or a negative +error code. If the pattern match failed, the match failure code is returned. +If a substring number greater than zero is used after a partial match, +PCRE2_ERROR_PARTIAL is returned. Other possible error codes are: +

+  PCRE2_ERROR_NOMEMORY
+
+The buffer was too small for pcre2_substring_copy_bynumber(), or the +attempt to get memory failed for pcre2_substring_get_bynumber(). +
+  PCRE2_ERROR_NOSUBSTRING
+
+There is no substring with that number in the pattern, that is, the number is +greater than the number of capturing parentheses. +
+  PCRE2_ERROR_UNAVAILABLE
+
+The substring number, though not greater than the number of captures in the +pattern, is greater than the number of slots in the ovector, so the substring +could not be captured. +
+  PCRE2_ERROR_UNSET
+
+The substring did not participate in the match. For example, if the pattern is +(abc)|(def) and the subject is "def", and the ovector contains at least two +capturing slots, substring number 1 is unset. +

+
EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
+

+int pcre2_substring_list_get(pcre2_match_data *match_data, +" PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); +
+
+void pcre2_substring_list_free(PCRE2_UCHAR **list); +

+

+The pcre2_substring_list_get() function extracts all available substrings +and builds a list of pointers to them. It also (optionally) builds a second +list that contains their lengths (in code units), excluding a terminating zero +that is added to each of them. All this is done in a single block of memory +that is obtained using the same memory allocation function that was used to get +the match data block. +

+

+This function must be called only after a successful match. If called after a +partial match, the error code PCRE2_ERROR_PARTIAL is returned. +

+

+The address of the memory block is returned via listptr, which is also +the start of the list of string pointers. The end of the list is marked by a +NULL pointer. The address of the list of lengths is returned via +lengthsptr. If your strings do not contain binary zeros and you do not +therefore need the lengths, you may supply NULL as the lengthsptr +argument to disable the creation of a list of lengths. The yield of the +function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block +could not be obtained. When the list is no longer needed, it should be freed by +calling pcre2_substring_list_free(). +

+

+If this function encounters a substring that is unset, which can happen when +capture group number n+1 matches some part of the subject, but group +n has not been used at all, it returns an empty string. This can be +distinguished from a genuine zero-length substring by inspecting the +appropriate offset in the ovector, which contain PCRE2_UNSET for unset +substrings, or by calling pcre2_substring_length_bynumber(). +

+
EXTRACTING CAPTURED SUBSTRINGS BY NAME
+

+int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); +
+
+int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); +
+
+int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); +
+
+int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); +
+
+void pcre2_substring_free(PCRE2_UCHAR *buffer); +

+

+To extract a substring by name, you first have to find associated number. +For example, for this pattern: +

+  (a+)b(?<xxx>\d+)...
+
+the number of the capture group called "xxx" is 2. If the name is known to be +unique (PCRE2_DUPNAMES was not set), you can find the number from the name by +calling pcre2_substring_number_from_name(). The first argument is the +compiled pattern, and the second is the name. The yield of the function is the +group number, PCRE2_ERROR_NOSUBSTRING if there is no group with that name, or +PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one group with that name. +Given the number, you can extract the substring directly from the ovector, or +use one of the "bynumber" functions described above. +

+

+For convenience, there are also "byname" functions that correspond to the +"bynumber" functions, the only difference being that the second argument is a +name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate +names, these functions scan all the groups with the given name, and return the +captured substring from the first named group that is set. +

+

+If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is +returned. If all groups with the name have numbers that are greater than the +number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there +is at least one group with a slot in the ovector, but no group is found to be +set, PCRE2_ERROR_UNSET is returned. +

+

+Warning: If the pattern uses the (?| feature to set up multiple +capture groups with the same number, as described in the +section on duplicate group numbers +in the +pcre2pattern +page, you cannot use names to distinguish the different capture groups, because +names are not included in the compiled code. The matching process uses only +numbers. For this reason, the use of different names for groups with the +same number causes an error at compile time. +

+
CREATING A NEW STRING WITH SUBSTITUTIONS
+

+int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); +

+

+This function optionally calls pcre2_match() and then makes a copy of the +subject string in outputbuffer, replacing parts that were matched with +the replacement string, whose length is supplied in rlength, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if replacement is NULL and rlength is zero, the +replacement is assumed to be an empty string. If rlength is non-zero, an +error occurs if replacement is NULL. +

+

+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). +

+

+If successful, pcre2_substitute() returns the number of substitutions +that were carried out. This may be zero if no match was found, and is never +greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is +returned if an error is detected. +

+

+Matches in which a \K item in a lookahead in the pattern causes the match to +end before it starts are not supported, and give rise to an error return. For +global replacements, matches in which \K in a lookbehind causes the match to +start earlier than the point that was reached in the previous iteration are +also not supported. +

+

+The first seven arguments of pcre2_substitute() are the same as for +pcre2_match(), except that the partial matching options are not +permitted, and match_data may be passed as NULL, in which case a match +data block is obtained and freed within this function, using memory management +functions from the match context, if provided, or else those that were used to +allocate memory for the compiled code. +

+

+If match_data is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the +provided block is used for all calls to pcre2_match(), and its contents +afterwards are the result of the final call. For global changes, this will +always be a no-match error. The contents of the ovector within the match data +block may or may not have been changed. +

+

+As well as the usual options for pcre2_match(), a number of additional +options can be set in the options argument of pcre2_substitute(). +One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external +match_data block must be provided, and it must have already been used for +an external call to pcre2_match() with the same pattern and subject +arguments. The data in the match_data block (return code, offset vector) +is then used for the first substitution instead of calling pcre2_match() +from within pcre2_substitute(). This allows an application to check for a +match before choosing to substitute, without having to repeat the match. +

+

+The contents of the externally supplied match data block are not changed when +PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set, +pcre2_match() is called after the first substitution to check for further +matches, but this is done using an internally obtained match data block, thus +always leaving the external block unchanged. +

+

+The code argument is not used for matching before the first substitution +when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when +PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the +UTF setting and the number of capturing parentheses in the pattern. +

+

+The default action of pcre2_substitute() is to return a copy of the +subject string with matched substrings replaced. However, if +PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are +returned. In the global case, multiple replacements are concatenated in the +output buffer. Substitution callouts (see +below) +can be used to separate them if necessary. +

+

+The outlengthptr argument of pcre2_substitute() must point to a +variable that contains the length, in code units, of the output buffer. If the +function is successful, the value is updated to contain the length in code +units of the new string, excluding the trailing zero that is automatically +added. +

+

+If the function is not successful, the value set via outlengthptr depends +on the type of error. For syntax errors in the replacement string, the value is +the offset in the replacement string where the error was detected. For other +errors, the value is PCRE2_UNSET by default. This includes the case of the +output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. +

+

+PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is +too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If +this option is set, however, pcre2_substitute() continues to go through +the motions of matching and substituting (without, of course, writing anything) +in order to compute the size of buffer that is needed, which will include the +extra space for the terminating NUL. This value is passed back via the +outlengthptr variable, with the result of the function still being +PCRE2_ERROR_NOMEMORY. +

+

+Passing a buffer size of zero is a permitted way of finding out how much memory +is needed for given substitution. However, this does mean that the entire +operation is carried out twice. Depending on the application, it may be more +efficient to allocate a large buffer and free the excess afterwards, instead of +using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. +

+

+The replacement string, which is interpreted as a UTF string in UTF mode, is +checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF +replacement string causes an immediate return with the relevant UTF error code. +

+

+If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted +in any way. By default, however, a dollar character is an escape character that +can specify the insertion of characters from capture groups and names from +(*MARK) or other control verbs in the pattern. Dollar is the only escape +character (backslash is treated as literal). The following forms are +recognized: +

+  $$                  insert a dollar character
+  $n or ${n}          insert the contents of group n
+  $0 or $&            insert the entire matched substring
+  $`                  insert the substring that precedes the match
+  $'                  insert the substring that follows the match
+  $_                  insert the entire input string
+  $*MARK or ${*MARK}  insert a control verb name
+
+Either a group number or a group name can be given for n, for example $2 or +$NAME. Curly brackets are required only if the following character would be +interpreted as part of the number or name. The number may be zero to include +the entire matched string. For example, if the pattern a(b)c is matched with +"=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=". +

+

+The JavaScript form $<name>, where the angle brackets are part of the syntax, +is also recognized for group names, but not for group numbers or *MARK. +

+

+$*MARK inserts the name from the last encountered backtracking control verb on +the matching path that has a name. (*MARK) must always include a name, but the +other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name +inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B". This +facility can be used to perform simple simultaneous substitutions, as this +pcre2test example shows: +

+  /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
+      apple lemon
+   2: pear orange
+
+PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string, +replacing every matching substring. If this option is not set, only the first +matching substring is replaced. The search for matches takes place in the +original subject string (that is, previous replacements do not affect it). +Iteration is implemented by advancing the startoffset value for each +search, which is always passed the entire subject string. If an offset limit is +set in the match context, searching stops when that limit is reached. +

+

+You can restrict the effect of a global substitution to a portion of the +subject string by setting either or both of startoffset and an offset +limit. Here is a pcre2test example: +

+  /B/g,replace=!,use_offset_limit
+  ABC ABC ABC ABC\=offset=3,offset_limit=12
+   2: ABC A!C A!C ABC
+
+When continuing with global substitutions after matching a substring with zero +length, an attempt to find a non-empty match at the same offset is performed. +If this is not successful, the offset is advanced by one character except when +CRLF is a valid newline sequence and the next two characters are CR, LF. In +this case, the offset is advanced by two characters. +

+

+PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do +not appear in the pattern to be treated as unset groups. This option should be +used with care, because it means that a typo in a group name or number no +longer causes the PCRE2_ERROR_NOSUBSTRING error. +

+

+PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including unknown +groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty +strings when inserted as described above. If this option is not set, an attempt +to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does +not influence the extended substitution syntax described below. +

+

+PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the +replacement string. Without this option, only the dollar character is special, +and only the group insertion forms listed above are valid. When +PCRE2_SUBSTITUTE_EXTENDED is set, several things change: +

+

+Firstly, backslash in a replacement string is interpreted as an escape +character. The usual forms such as \x{ddd} can be used to specify particular +character codes, and backslash followed by any non-alphanumeric character +quotes that character. Extended quoting can be coded using \Q...\E, exactly +as in pattern strings. The escapes \b and \v are interpreted as the +characters backspace and vertical tab, respectively. +

+

+The interpretation of backslash followed by one or more digits is the same as +in a pattern, which in Perl has some ambiguities. Details are given in the +pcre2pattern +page. +

+

+The Python form \g<n>, where the angle brackets are part of the syntax and n +is either a group name or number, is recognized as an altertive way of +inserting the contents of a group, for example \g<3>. +

+

+There are also four escape sequences for forcing the case of inserted letters. +Case forcing applies to all inserted characters, including those from capture +groups and letters within \Q...\E quoted sequences. The insertion mechanism +has three states: no case forcing, force upper case, and force lower case. The +escape sequences change the current state: \U and \L change to upper or lower +case forcing, respectively, and \E (when not terminating a \Q quoted +sequence) reverts to no case forcing. The sequences \u and \l force the next +character (if it is a letter) to upper or lower case, respectively, and then +the state automatically reverts to no case forcing. +

+

+However, if \u is immediately followed by \L or \l is immediately followed +by \U, the next character's case is forced by the first escape sequence, and +subsequent characters by the second. This provides a "title casing" facility +that can be applied to group captures. For example, if group 1 has captured +"heLLo", the replacement string "\u\L$1" becomes "Hello". +

+

+If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode +properties are used for case forcing characters whose code points are greater +than 127. However, only simple case folding, as determined by the Unicode file +CaseFolding.txt is supported. PCRE2 does not support language-specific +special casing rules such as using different lower case Greek sigmas in the +middle and ends of words (as defined in the Unicode file +SpecialCasing.txt). +

+

+Note that case forcing sequences such as \U...\E do not nest. For example, +the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no +effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do +not apply to replacement strings. +

+

+The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +flexibility to capture group substitution. The syntax is similar to that used +by Bash: +

+  ${n:-string}
+  ${n:+string1:string2}
+
+As in the simple case, n may be a group number or a name. The first form +specifies a default value. If group n is set, its value is inserted; if +not, the string is expanded and the result inserted. The second form specifies +strings that are expanded and inserted when group n is set or unset, +respectively. The first form is just a convenient shorthand for +
+  ${n:+${n}:string}
+
+Backslash can be used to escape colons and closing curly brackets in the +replacement strings. A change of the case forcing state within a replacement +string remains in force afterwards, as shown in this pcre2test example: +
+  /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
+      body
+   1: hello
+      somebody
+   1: HELLO
+
+The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended +substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown +groups in the extended syntax forms to be treated as unset. +

+

+If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, +PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and +are ignored. +

+
+Substitution errors +
+

+In the event of an error, pcre2_substitute() returns a negative error +code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from +pcre2_match() are passed straight back. +

+

+PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion, +unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. +

+

+PCRE2_ERROR_UNSET is returned for an unset substring insertion (including an +unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) when the simple +(non-extended) syntax is used and PCRE2_SUBSTITUTE_UNSET_EMPTY is not set. +

+

+PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is +needed is returned via outlengthptr. Note that this does not happen by +default. +

+

+PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the +match_data argument is NULL or if the subject or replacement +arguments are NULL. For backward compatibility reasons an exception is made for +the replacement argument if the rlength argument is also 0. +

+

+PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the +replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE +(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket +not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group +substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before +it started or the match started earlier than the current position in the +subject, which can happen if \K is used in an assertion). +

+

+As for all PCRE2 errors, a text message that describes the error can be +obtained by calling the pcre2_get_error_message() function (see +"Obtaining a textual error message" +above). +

+
+Substitution callouts +
+

+int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); +
+
+The pcre2_set_substitution_callout() function can be used to specify a +callout function for pcre2_substitute(). This information is passed in +a match context. The callout function is called after each substitution has +been processed, but it can cause the replacement not to happen. +

+

+The callout function is not called for simulated substitutions that happen as a +result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when +substitution processing exceeds the buffer space provided by the caller, +processing continues by counting code units. The simulation is unable to +populate the callout block, and so the simulation is pessimistic about the +required buffer size. Whichever is larger of accepted or rejected substitution +is reported as the required size. Therefore, the returned buffer length may be +an overestimate (without a substitution callout, it is normally an exact +measurement). +

+

+The first argument of the callout function is a pointer to a substitute callout +block structure, which contains the following fields, not necessarily in this +order: +

+  uint32_t    version;
+  uint32_t    subscount;
+  PCRE2_SPTR  input;
+  PCRE2_SPTR  output;
+  PCRE2_SIZE *ovector;
+  uint32_t    oveccount;
+  PCRE2_SIZE  output_offsets[2];
+
+The version field contains the version number of the block format. The +current version is 0. The version number will increase in future if more fields +are added, but the intention is never to remove any of the existing fields. +

+

+The subscount field is the number of the current match. It is 1 for the +first callout, 2 for the second, and so on. The input and output +pointers are copies of the values passed to pcre2_substitute(). +

+

+The ovector field points to the ovector, which contains the result of the +most recent match. The oveccount field contains the number of pairs that +are set in the ovector, and is always greater than zero. +

+

+The output_offsets vector contains the offsets of the replacement in the +output string. This has already been processed for dollar and (if requested) +backslash substitutions as described above. +

+

+The second argument of the callout function is the value passed as +callout_data when the function was registered. The value returned by the +callout function is interpreted as follows: +

+

+If the value is zero, the replacement is accepted, and, if +PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next +match. If the value is not zero, the current replacement is not accepted. If +the value is greater than zero, processing continues when +PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or +PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the +output and the call to pcre2_substitute() exits, returning the number of +matches so far. +

+
+Substitution case callouts +
+

+int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); +
+
+The pcre2_set_substitution_case_callout() function can be used to specify +a callout function for pcre2_substitute() to use when performing case +transformations. This does not affect any case insensitivity behaviour when +performing a match, but only the user-visible transformations performed when +processing a substitution such as: +

+    pcre2_substitute(..., "\\U$1", ...)
+
+

+

+The default case transformations applied by PCRE2 are reasonably complete, and, +in UTF or UCP mode, perform the simple locale-invariant case transformations as +specified by Unicode. This is suitable for the internal (invisible) +case-equivalence procedures used during pattern matching, but an application +may wish to use more sophisticated locale-aware processing for the user-visible +substitution transformations. +

+

+One example implementation of the callout_function using the ICU +library would be: +
+
+

+    PCRE2_SIZE
+    icu_case_callout(
+      PCRE2_SPTR input, PCRE2_SIZE input_len,
+      PCRE2_UCHAR *output, PCRE2_SIZE output_cap,
+      int to_case, void *data_ptr)
+    {
+      UErrorCode err = U_ZERO_ERROR;
+      int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER
+        ? u_strToLower(output, output_cap, input, input_len, NULL, &err)
+        : to_case == PCRE2_SUBSTITUTE_CASE_UPPER
+        ? u_strToUpper(output, output_cap, input, input_len, NULL, &err)
+        : u_strToTitle(output, output_cap, input, input_len, &first_char_only,
+                       NULL, &err);
+      if (U_FAILURE(err)) return (~(PCRE2_SIZE)0);
+      return r;
+    }
+
+

+

+The first and second arguments of the case callout function are the Unicode +string to transform. +

+

+The third and fourth arguments are the output buffer and its capacity. +

+

+The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, +PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase +and the rest to Unicode lowercase (note that titlecasing sometimes uses Unicode +properties to titlecase each word in a string; but PCRE2 is requesting that only +the single leading character is to be titlecased). +

+

+The sixth argument is the callout_data supplied to +pcre2_set_substitute_case_callout(). +

+

+The resulting string in the destination buffer may be larger or smaller than the +input, if the casing rules merge or split characters. The return value is the +length required for the output string. If a buffer of sufficient size was +provided to the callout, then the result must be written to the buffer and the +number of code units returned. If the result does not fit in the provided +buffer, then the required capacity must be returned and PCRE2 will not make use +of the output buffer. PCRE2 provides input and output buffers which overlap, so +the callout must support this by suitable internal buffering. +

+

+Alternatively, if the callout wishes to indicate an error, then it may return +(~(PCRE2_SIZE)0). In this case pcre2_substitute() will immediately fail with +error PCRE2_ERROR_REPLACECASE. +

+

+When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH +option, there are situations when pcre2_substitute() will return an +underestimate of the required buffer size. If you call pcre2_substitute() once +with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the input buffer is too small for +the replacement string to be constructed, then instead of calling the case +callout, pcre2_substitute() will make an estimate of the required buffer size. +The second call should also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that +second call is not guaranteed to succeed either, if the case callout requires +more buffer space than expected. The caller must make repeated attempts in a +loop. +

+
DUPLICATE CAPTURE GROUP NAMES
+

+int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); +

+

+When a pattern is compiled with the PCRE2_DUPNAMES option, names for capture +groups are not required to be unique. Duplicate names are always allowed for +groups with the same number, created by using the (?| feature. Indeed, if such +groups are named, they are required to use the same names. +

+

+Normally, patterns that use duplicate names are such that in any one match, +only one of each set of identically-named groups participates. An example is +shown in the +pcre2pattern +documentation. +

+

+When duplicates are present, pcre2_substring_copy_byname() and +pcre2_substring_get_byname() return the first substring corresponding to +the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is +returned. The pcre2_substring_number_from_name() function returns the +error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names. +

+

+If you want to get full details of all captured substrings for a given name, +you must use the pcre2_substring_nametable_scan() function. The first +argument is the compiled pattern, and the second is the name. If the third and +fourth arguments are NULL, the function returns a group number for a unique +name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. +

+

+When the third and fourth arguments are not NULL, they must be pointers to +variables that are updated by the function. After it has run, they point to the +first and last entries in the name-to-number table for the given name, and the +function returns the length of each entry in code units. In both cases, +PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. +

+

+The format of the name table is described +above +in the section entitled Information about a pattern. Given all the +relevant entries for the name, you can extract each of their numbers, and hence +the captured data. +

+
FINDING ALL POSSIBLE MATCHES AT ONE POSITION
+

+The traditional matching function uses a similar algorithm to Perl, which stops +when it finds the first match at a given point in the subject. If you want to +find all possible matches, or the longest possible match at a given position, +consider using the alternative matching function (see below) instead. If you +cannot use the alternative function, you can kludge it up by making use of the +callout facility, which is described in the +pcre2callout +documentation. +

+

+What you have to do is to insert a callout right at the end of the pattern. +When your callout function is called, extract and save the current matched +substring. Then return 1, which forces pcre2_match() to backtrack and try +other alternatives. Ultimately, when it runs out of matches, +pcre2_match() will yield PCRE2_ERROR_NOMATCH. +

+
MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
+

+int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); +

+

+The function pcre2_dfa_match() is called to match a subject string +against a compiled pattern, using a matching algorithm that scans the subject +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that pcre2_dfa_match() does +not support, see the +pcre2matching +documentation. +

+

+The arguments for the pcre2_dfa_match() function are the same as for +pcre2_match(), plus two extras. The ovector within the match data block +is used in a different way, and this is described below. The other common +arguments are used in the same way as for pcre2_match(), so their +description is not repeated here. +

+

+The two additional arguments provide workspace for the function. The workspace +vector should contain at least 20 elements. It is used for keeping track of +multiple paths through the pattern tree. More workspace is needed for patterns +and subjects where there are a lot of potential matches. +

+

+Here is an example of a simple call to pcre2_dfa_match(): +

+  int wspace[20];
+  pcre2_match_data *md = pcre2_match_data_create(4, NULL);
+  int rc = pcre2_dfa_match(
+    re,             /* result of pcre2_compile() */
+    "some string",  /* the subject string */
+    11,             /* the length of the subject string */
+    0,              /* start at offset 0 in the subject */
+    0,              /* default options */
+    md,             /* the match data block */
+    NULL,           /* a match context; NULL means use defaults */
+    wspace,         /* working space vector */
+    20);            /* number of elements (NOT size in bytes) */
+
+

+
+Option bits for pcre2_dfa_match() +
+

+The unused bits of the options argument for pcre2_dfa_match() must +be zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, +PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, +PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last +four of these are exactly the same as for pcre2_match(), so their +description is not repeated here. +

+  PCRE2_PARTIAL_HARD
+  PCRE2_PARTIAL_SOFT
+
+These have the same general effect as they do for pcre2_match(), but the +details are slightly different. When PCRE2_PARTIAL_HARD is set for +pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the +subject is reached and there is still at least one matching possibility that +requires additional characters. This happens even if some complete matches have +already been found. When PCRE2_PARTIAL_SOFT is set, the return code +PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL if the end of the +subject is reached, there have been no complete matches, but there is still at +least one matching possibility. The portion of the string that was inspected +when the longest partial match was found is set as the first matching string in +both cases. There is a more detailed discussion of partial and multi-segment +matching, with examples, in the +pcre2partial +documentation. +
+  PCRE2_DFA_SHORTEST
+
+Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to stop as +soon as it has found one match. Because of the way the alternative algorithm +works, this is necessarily the shortest possible match at the first possible +matching point in the subject string. +
+  PCRE2_DFA_RESTART
+
+When pcre2_dfa_match() returns a partial match, it is possible to call it +again, with additional subject characters, and have it continue with the same +match. The PCRE2_DFA_RESTART option requests this action; when it is set, the +workspace and wscount options must reference the same vector as +before because data about the match so far is left in them after a partial +match. There is more discussion of this facility in the +pcre2partial +documentation. +

+
+Successful returns from pcre2_dfa_match() +
+

+When pcre2_dfa_match() succeeds, it may have matched more than one +substring in the subject. Note, however, that all the matches from one run of +the function start at the same point in the subject. The shorter matches are +all initial substrings of the longer matches. For example, if the pattern +

+  <.*>
+
+is matched against the string +
+  This is <something> <something else> <something further> no more
+
+the three matched strings are +
+  <something> <something else> <something further>
+  <something> <something else>
+  <something>
+
+On success, the yield of the function is a number greater than zero, which is +the number of matched substrings. The offsets of the substrings are returned in +the ovector, and can be extracted by number in the same way as for +pcre2_match(), but the numbers bear no relation to any capture groups +that may exist in the pattern, because DFA matching does not support capturing. +

+

+Calls to the convenience functions that extract substrings by name +return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a +DFA match. The convenience functions that extract substrings by number never +return PCRE2_ERROR_NOSUBSTRING. +

+

+The matched strings are stored in the ovector in reverse order of length; that +is, the longest matching string is first. If there were too many matches to fit +into the ovector, the yield of the function is zero, and the vector is filled +with the longest matches. +

+

+NOTE: PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\d+" is compiled as if it were "a\d++". For DFA matching, this +means that only one possible match is found. If you really do want multiple +matches in such cases, either use an ungreedy repeat such as "a\d+?" or set +the PCRE2_NO_AUTO_POSSESS option when compiling. +

+
+Error returns from pcre2_dfa_match() +
+

+The pcre2_dfa_match() function returns a negative number when it fails. +Many of the errors are the same as for pcre2_match(), as described +above. +There are in addition the following errors that are specific to +pcre2_dfa_match(): +

+  PCRE2_ERROR_DFA_UITEM
+
+This return is given if pcre2_dfa_match() encounters an item in the +pattern that it does not support, for instance, the use of \C in a UTF mode or +a backreference. +
+  PCRE2_ERROR_DFA_UCOND
+
+This return is given if pcre2_dfa_match() encounters a condition item +that uses a backreference for the condition, or a test for recursion in a +specific capture group. These are not supported. +
+  PCRE2_ERROR_DFA_UINVALID_UTF
+
+This return is given if pcre2_dfa_match() is called for a pattern that +was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for DFA +matching. +
+  PCRE2_ERROR_DFA_WSSIZE
+
+This return is given if pcre2_dfa_match() runs out of space in the +workspace vector. +
+  PCRE2_ERROR_DFA_RECURSE
+
+When a recursion or subroutine call is processed, the matching function calls +itself recursively, using private memory for the ovector and workspace. +This error is given if the internal ovector is not large enough. This should be +extremely rare, as a vector of size 1000 is used. +
+  PCRE2_ERROR_DFA_BADRESTART
+
+When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option, +some plausibility checks are made on the contents of the workspace, which +should contain data about the previous partial match. If any of these checks +fail, this error is given. +

+
SEE ALSO
+

+pcre2build(3), pcre2callout(3), pcre2demo(3), +pcre2matching(3), pcre2partial(3), pcre2posix(3), +pcre2sample(3), pcre2unicode(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 26 December 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2build.html b/3rd/pcre2/doc/html/pcre2build.html new file mode 100644 index 00000000..f4e127f1 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2build.html @@ -0,0 +1,652 @@ + + +pcre2build specification + + +

pcre2build man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
BUILDING PCRE2
+

+PCRE2 is distributed with a configure script that can be used to build +the library in Unix-like environments using the applications known as +Autotools. Also in the distribution are files to support building using +CMake instead of configure. The text file +README +contains general information about building with Autotools (some of which is +repeated below), and also has some comments about building on various operating +systems. The files in the vms directory support building under OpenVMS. +There is a lot more information about building PCRE2 without using +Autotools (including information about using CMake and building "by +hand") in the text file called +NON-AUTOTOOLS-BUILD. +You should consult this file as well as the +README +file if you are building in a non-Unix-like environment. +

+
PCRE2 BUILD-TIME OPTIONS
+

+The rest of this document describes the optional features of PCRE2 that can be +selected when the library is compiled. It assumes use of the configure +script, where the optional features are selected or deselected by providing +options to configure before running the make command. However, the +same options can be selected in both Unix-like and non-Unix-like environments +if you are using CMake instead of configure to build PCRE2. +

+

+If you are not using Autotools or CMake, option selection can be done by +editing the config.h file, or by passing parameter settings to the +compiler, as described in +NON-AUTOTOOLS-BUILD. +

+

+The complete list of options for configure (which includes the standard +ones such as the selection of the installation directory) can be obtained by +running +

+  ./configure --help
+
+The following sections include descriptions of "on/off" options whose names +begin with --enable or --disable. Because of the way that configure +works, --enable and --disable always come in pairs, so the complementary option +always exists as well, but as it specifies the default, it is not described. +Options that specify values have names that start with --with. At the end of a +configure run, a summary of the configuration is output. +

+
BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES
+

+By default, a library called libpcre2-8 is built, containing functions +that take string arguments contained in arrays of bytes, interpreted either as +single-byte characters, or UTF-8 strings. You can also build two other +libraries, called libpcre2-16 and libpcre2-32, which process +strings that are contained in arrays of 16-bit and 32-bit code units, +respectively. These can be interpreted either as single-unit characters or +UTF-16/UTF-32 strings. To build these additional libraries, add one or both of +the following to the configure command: +

+  --enable-pcre2-16
+  --enable-pcre2-32
+
+If you do not want the 8-bit library, add +
+  --disable-pcre2-8
+
+as well. At least one of the three libraries must be built. Note that the POSIX +wrapper is for the 8-bit library only, and that pcre2grep is an 8-bit +program. Neither of these are built if you select only the 16-bit or 32-bit +libraries. +

+
BUILDING SHARED AND STATIC LIBRARIES
+

+The Autotools PCRE2 building process uses libtool to build both shared +and static libraries by default. You can suppress an unwanted library by adding +one of +

+  --disable-shared
+  --disable-static
+
+to the configure command. Setting --disable-shared ensures that PCRE2 +libraries are built as static libraries. The binaries that are then created as +part of the build process (for example, pcre2test and pcre2grep) +are linked statically with one or more PCRE2 libraries, but may also be +dynamically linked with other libraries such as libc. If you want these +binaries to be fully statically linked, you can set LDFLAGS like this: +
+
+LDFLAGS=--static ./configure --disable-shared +
+
+Note the two hyphens in --static. Of course, this works only if static versions +of all the relevant libraries are available for linking. +

+
UNICODE AND UTF SUPPORT
+

+By default, PCRE2 is built with support for Unicode and UTF character strings. +To build it without Unicode support, add +

+  --disable-unicode
+
+to the configure command. This setting applies to all three libraries. It +is not possible to build one library with Unicode support and another without +in the same configuration. +

+

+Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16 +or UTF-32. To do that, applications that use the library can set the PCRE2_UTF +option when they call pcre2_compile() to compile a pattern. +Alternatively, patterns may be started with (*UTF) unless the application has +locked this out by setting PCRE2_NEVER_UTF. +

+

+UTF support allows the libraries to process character code points up to +0x10ffff in the strings that they handle. Unicode support also gives access to +the Unicode properties of characters, using pattern escapes such as \P, \p, +and \X. Only the general category properties such as Lu and Nd, +script names, and some bi-directional properties are supported. Details are +given in the +pcre2pattern +documentation. +

+

+Pattern escapes such as \d and \w do not by default make use of Unicode +properties. The application can request that they do by setting the PCRE2_UCP +option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also +request this by starting with (*UCP). +

+
DISABLING THE USE OF \C
+

+The \C escape sequence, which matches a single code unit, even in a UTF mode, +can cause unpredictable behaviour because it may leave the current matching +point in the middle of a multi-code-unit character. The application can lock it +out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +pcre2_compile(). There is also a build-time option +

+  --enable-never-backslash-C
+
+(note the upper case C) which locks out the use of \C entirely. +

+
JUST-IN-TIME COMPILER SUPPORT
+

+Just-in-time (JIT) compiler support is included in the build by specifying +

+  --enable-jit
+
+This support is available only for certain hardware architectures. If this +option is set for an unsupported architecture, a building error occurs. +If in doubt, use +
+  --enable-jit=auto
+
+which enables JIT only if the current hardware is supported. You can check +if JIT is enabled in the configuration summary that is output at the end of a +configure run. If you are enabling JIT under SELinux you may also want to +add +
+  --enable-jit-sealloc
+
+which enables the use of an execmem allocator in JIT that is compatible with +SELinux. This has no effect if JIT is not enabled. See the +pcre2jit +documentation for a discussion of JIT usage. When JIT support is enabled, +pcre2grep automatically makes use of it, unless you add +
+  --disable-pcre2grep-jit
+
+to the configure command. +

+
NEWLINE RECOGNITION
+

+By default, PCRE2 interprets the linefeed (LF) character as indicating the end +of a line. This is the normal newline character on Unix-like systems. You can +compile PCRE2 to use carriage return (CR) instead, by adding +

+  --enable-newline-is-cr
+
+to the configure command. There is also an --enable-newline-is-lf option, +which explicitly specifies linefeed as the newline character. +

+

+Alternatively, you can specify that line endings are to be indicated by the +two-character sequence CRLF (CR immediately followed by LF). If you want this, +add +

+  --enable-newline-is-crlf
+
+to the configure command. There is a fourth option, specified by +
+  --enable-newline-is-anycrlf
+
+which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as +indicating a line ending. A fifth option, specified by +
+  --enable-newline-is-any
+
+causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline +sequences are the three just mentioned, plus the single characters VT (vertical +tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line +separator, U+2028), and PS (paragraph separator, U+2029). The final option is +
+  --enable-newline-is-nul
+
+which causes NUL (binary zero) to be set as the default line-ending character. +

+

+Whatever default line ending convention is selected when PCRE2 is built can be +overridden by applications that use the library. At build time it is +recommended to use the standard for your operating system. +

+
WHAT \R MATCHES
+

+By default, the sequence \R in a pattern matches any Unicode newline sequence, +independently of what has been selected as the line ending sequence. If you +specify +

+  --enable-bsr-anycrlf
+
+the default is changed so that \R matches only CR, LF, or CRLF. Whatever is +selected when PCRE2 is built can be overridden by applications that use the +library. +

+
HANDLING VERY LARGE PATTERNS
+

+Within a compiled pattern, offset values are used to point from one part to +another (for example, from an opening parenthesis to an alternation +metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values +are used for these offsets, leading to a maximum size for a compiled pattern of +around 64 thousand code units. This is sufficient to handle all but the most +gigantic patterns. Nevertheless, some people do want to process truly enormous +patterns, so it is possible to compile PCRE2 to use three-byte or four-byte +offsets by adding a setting such as +

+  --with-link-size=3
+
+to the configure command. The value given must be 2, 3, or 4. For the +16-bit library, a value of 3 is rounded up to 4. In these libraries, using +longer offsets slows down the operation of PCRE2 because it has to load +additional data when handling them. For the 32-bit library the value is always +4 and cannot be overridden; the value of --with-link-size is ignored. +

+
LIMITING PCRE2 RESOURCE USAGE
+

+The pcre2_match() function increments a counter each time it goes round +its main loop. Putting a limit on this counter controls the amount of computing +resource used by a single call to pcre2_match(). The limit can be changed +at run time, as described in the +pcre2api +documentation. The default is 10 million, but this can be changed by adding a +setting such as +

+  --with-match-limit=500000
+
+to the configure command. This setting also applies to the +pcre2_dfa_match() matching function, and to JIT matching (though the +counting is done differently). +

+

+The pcre2_match() function uses heap memory to record backtracking +points. The more nested backtracking points there are (that is, the deeper the +search tree), the more memory is needed. There is an upper limit, specified in +kibibytes (units of 1024 bytes). This limit can be changed at run time, as +described in the +pcre2api +documentation. The default limit (in effect unlimited) is 20 million. You can +change this by a setting such as +

+  --with-heap-limit=500
+
+which limits the amount of heap to 500 KiB. This limit applies only to +interpretive matching in pcre2_match() and pcre2_dfa_match(), which +may also use the heap for internal workspace when processing complicated +patterns. This limit does not apply when JIT (which has its own memory +arrangements) is used. +

+

+You can also explicitly limit the depth of nested backtracking in the +pcre2_match() interpreter. This limit defaults to the value that is set +for --with-match-limit. You can set a lower default limit by adding, for +example, +

+  --with-match-limit-depth=10000
+
+to the configure command. This value can be overridden at run time. This +depth limit indirectly limits the amount of heap memory that is used, but +because the size of each backtracking "frame" depends on the number of +capturing parentheses in a pattern, the amount of heap that is used before the +limit is reached varies from pattern to pattern. This limit was more useful in +versions before 10.30, where function recursion was used for backtracking. +

+

+As well as applying to pcre2_match(), the depth limit also controls +the depth of recursive function calls in pcre2_dfa_match(). These are +used for lookaround assertions, atomic groups, and recursion within patterns. +The limit does not apply to JIT matching. +

+
LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS
+

+Lookbehind assertions in which one or more branches can match a variable number +of characters are supported only if there is a maximum matching length for each +top-level branch. There is a limit to this maximum that defaults to 255 +characters. You can alter this default by a setting such as +

+  --with-max-varlookbehind=100
+
+The limit can be changed at runtime by calling +pcre2_set_max_varlookbehind(). Lookbehind assertions in which every +branch matches a fixed number of characters (not necessarily all the same) are +not constrained by this limit. +

+
CREATING CHARACTER TABLES AT BUILD TIME
+

+PCRE2 uses fixed tables for processing characters whose code points are less +than 256. By default, PCRE2 is built with a set of tables that are distributed +in the file src/pcre2_chartables.c.dist. These tables are for ASCII codes +only. If you add +

+  --enable-rebuild-chartables
+
+to the configure command, the distributed tables are no longer used. +Instead, a program called pcre2_dftables is compiled and run. This +outputs the source for new set of tables, created in the default locale of your +C run-time system. This method of replacing the tables does not work if you are +cross compiling, because pcre2_dftables needs to be run on the local +host and therefore not compiled with the cross compiler. +

+

+If you need to create alternative tables when cross compiling, you will have to +do so "by hand". There may also be other reasons for creating tables manually. +To cause pcre2_dftables to be built on the local host, run a normal +compiling command, and then run the program with the output file as its +argument, for example: +

+  cc src/pcre2_dftables.c -o pcre2_dftables
+  ./pcre2_dftables src/pcre2_chartables.c
+
+This builds the tables in the default locale of the local host. If you want to +specify a locale, you must use the -L option: +
+  LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
+
+You can also specify -b (with or without -L). This causes the tables to be +written in binary instead of as source code. A set of binary tables can be +loaded into memory by an application and passed to pcre2_compile() in the +same way as tables created by calling pcre2_maketables(). The tables are +just a string of bytes, independent of hardware characteristics such as +endianness. This means they can be bundled with an application that runs in +different environments, to ensure consistent behaviour. +

+
USING EBCDIC CODE
+

+PCRE2 assumes by default that it will run in an environment where the character +code is ASCII or Unicode, which is a superset of ASCII. This is the case for +most computer operating systems. PCRE2 can, however, be compiled to run in an +8-bit EBCDIC environment by adding +

+  --enable-ebcdic --disable-unicode
+
+to the configure command. This setting implies +--enable-rebuild-chartables. You should only use it if you know that you are in +an EBCDIC environment (for example, an IBM mainframe operating system). +

+

+It is not possible to support both EBCDIC and UTF-8 codes in the same version +of the library. Consequently, --enable-unicode and --enable-ebcdic are mutually +exclusive. +

+

+The EBCDIC character that corresponds to an ASCII LF is assumed to have the +value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In +such an environment you should use +

+  --enable-ebcdic-nl25
+
+as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the +same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is not +chosen as LF is made to correspond to the Unicode NEL character (which, in +Unicode, is 0x85). +

+

+The options that select newline behaviour, such as --enable-newline-is-cr, +and equivalent run-time options, refer to these character values in an EBCDIC +environment. +

+
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
+

+By default pcre2grep supports the use of callouts with string arguments +within the patterns it is matching. There are two kinds: one that generates +output using local code, and another that calls an external program or script. +If --disable-pcre2grep-callout-fork is added to the configure command, +only the first kind of callout is supported; if --disable-pcre2grep-callout is +used, all callouts are completely ignored. For more details of pcre2grep +callouts, see the +pcre2grep +documentation. +

+
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
+

+By default, pcre2grep reads all files as plain text. You can build it so +that it recognizes files whose names end in .gz or .bz2, and reads +them with libz or libbz2, respectively, by adding one or both of +

+  --enable-pcre2grep-libz
+  --enable-pcre2grep-libbz2
+
+to the configure command. These options naturally require that the +relevant libraries are installed on your system. Configuration will fail if +they are not. +

+
PCRE2GREP BUFFER SIZE
+

+pcre2grep uses an internal buffer to hold a "window" on the file it is +scanning, in order to be able to output "before" and "after" lines when it +finds a match. The default starting size of the buffer is 20KiB. The buffer +itself is three times this size, but because of the way it is used for holding +"before" lines, the longest line that is guaranteed to be processable is the +notional buffer size. If a longer line is encountered, pcre2grep +automatically expands the buffer, up to a specified maximum size, whose default +is 1MiB or the starting size, whichever is the larger. You can change the +default parameter values by adding, for example, +

+  --with-pcre2grep-bufsize=51200
+  --with-pcre2grep-max-bufsize=2097152
+
+to the configure command. The caller of pcre2grep can override +these values by using --buffer-size and --max-buffer-size on the command line. +

+
PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
+

+If you add one of +

+  --enable-pcre2test-libreadline
+  --enable-pcre2test-libedit
+
+to the configure command, pcre2test is linked with the +libreadline orlibedit library, respectively, and when its input is +from a terminal, it reads it using the readline() function. This provides +line-editing and history facilities. Note that libreadline is +GPL-licensed, so if you distribute a binary of pcre2test linked in this +way, there may be licensing issues. These can be avoided by linking instead +with libedit, which has a BSD licence. +

+

+Setting --enable-pcre2test-libreadline causes the -lreadline option to be +added to the pcre2test build. In many operating environments with a +system-installed readline library this is sufficient. However, in some +environments (e.g. if an unmodified distribution version of readline is in +use), some extra configuration may be necessary. The INSTALL file for +libreadline says this: +

+  "Readline uses the termcap functions, but does not link with
+  the termcap or curses library itself, allowing applications
+  which link with readline the to choose an appropriate library."
+
+If your environment has not been set up so that an appropriate library is +automatically included, you may need to add something like +
+  LIBS="-ncurses"
+
+immediately before the configure command. +

+
INCLUDING DEBUGGING CODE
+

+If you add +

+  --enable-debug
+
+to the configure command, additional debugging code is included in the +build. This feature is intended for use by the PCRE2 maintainers. +

+
DEBUGGING WITH VALGRIND SUPPORT
+

+If you add +

+  --enable-valgrind
+
+to the configure command, PCRE2 will use valgrind annotations to mark +certain memory regions as unaddressable. This allows it to detect invalid +memory accesses, and is mostly useful for debugging PCRE2 itself. +

+
CODE COVERAGE REPORTING
+

+If your C compiler is gcc, you can build a version of PCRE2 that can generate a +code coverage report for its test suite. To enable this, you must install +lcov version 1.6 or above. Then specify +

+  --enable-coverage
+
+to the configure command and build PCRE2 in the usual way. +

+

+Note that using ccache (a caching C compiler) is incompatible with code +coverage reporting. If you have configured ccache to run automatically +on your system, you must set the environment variable +

+  CCACHE_DISABLE=1
+
+before running make to build PCRE2, so that ccache is not used. +

+

+When --enable-coverage is used, the following addition targets are added to the +Makefile: +

+  make coverage
+
+This creates a fresh coverage report for the PCRE2 test suite. It is equivalent +to running "make coverage-reset", "make coverage-baseline", "make check", and +then "make coverage-report". +
+  make coverage-reset
+
+This zeroes the coverage counters, but does nothing else. +
+  make coverage-baseline
+
+This captures baseline coverage information. +
+  make coverage-report
+
+This creates the coverage report. +
+  make coverage-clean-report
+
+This removes the generated coverage report without cleaning the coverage data +itself. +
+  make coverage-clean-data
+
+This removes the captured coverage data without removing the coverage files +created at compile time (*.gcno). +
+  make coverage-clean
+
+This cleans all coverage data including the generated coverage report. For more +information about code coverage, see the gcov and lcov +documentation. +

+
DISABLING THE Z AND T FORMATTING MODIFIERS
+

+The C99 standard defines formatting modifiers z and t for size_t and +ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in +environments other than old versions of Microsoft Visual Studio when +__STDC_VERSION__ is defined and has a value greater than or equal to 199901L +(indicating support for C99). +However, there is at least one environment that claims to be C99 but does not +support these modifiers. If +

+  --disable-percent-zt
+
+is specified, no use is made of the z or t modifiers. Instead of %td or %zu, +a suitable format is used depending in the size of long for the platform. +

+
SUPPORT FOR FUZZERS
+

+There is a special option for use by people who want to run fuzzing tests on +PCRE2: +

+  --enable-fuzz-support
+
+At present this applies only to the 8-bit library. If set, it causes an extra +library called libpcre2-fuzzsupport.a to be built, but not installed. This +contains a single function called LLVMFuzzerTestOneInput() whose arguments are +a pointer to a string and the length of the string. When called, this function +tries to compile the string as a pattern, and if that succeeds, to match it. +This is done both with no options and with some random options bits that are +generated from the string. +

+

+Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck +to be created. This is normally run under valgrind or used when PCRE2 is +compiled with address sanitizing enabled. It calls the fuzzing function and +outputs information about what it is doing. The input strings are specified by +arguments: if an argument starts with "=" the rest of it is a literal input +string. Otherwise, it is assumed to be a file name, and the contents of the +file are the test string. +

+
OBSOLETE OPTION
+

+In versions of PCRE2 prior to 10.30, there were two ways of handling +backtracking in the pcre2_match() function. The default was to use the +system stack, but if +

+  --disable-stack-for-recursion
+
+was set, memory on the heap was used. From release 10.30 onwards this has +changed (the stack is no longer used) and this option now does nothing except +give a warning. +

+
SEE ALSO
+

+pcre2api(3), pcre2-config(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 16 April 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2callout.html b/3rd/pcre2/doc/html/pcre2callout.html new file mode 100644 index 00000000..cdb65ad6 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2callout.html @@ -0,0 +1,480 @@ + + +pcre2callout specification + + +

pcre2callout man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+#include <pcre2.h> +

+

+int (*pcre2_callout)(pcre2_callout_block *, void *); +
+
+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +

+
DESCRIPTION
+

+PCRE2 provides a feature called "callout", which is a means of temporarily +passing control to the caller of PCRE2 in the middle of pattern matching. The +caller of PCRE2 provides an external function by putting its entry point in +a match context (see pcre2_set_callout() in the +pcre2api +documentation). +

+

+When using the pcre2_substitute() function, an additional callout feature +is available. This does a callout after each change to the subject string and +is described in the +pcre2api +documentation; the rest of this document is concerned with callouts during +pattern matching. +

+

+Within a regular expression, (?C<arg>) indicates a point at which the external +function is to be called. Different callout points can be identified by putting +a number less than 256 after the letter C. The default value is zero. +Alternatively, the argument may be a delimited string. The starting delimiter +must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the +start, except for {, where the ending delimiter is }. If the ending delimiter +is needed within the string, it must be doubled. For example, this pattern has +two callout points: +

+  (?C1)abc(?C"some ""arbitrary"" text")def
+
+If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 +automatically inserts callouts, all with number 255, before each item in the +pattern except for immediately before or after an explicit callout. For +example, if PCRE2_AUTO_CALLOUT is used with the pattern +
+  A(?C3)B
+
+it is processed as if it were +
+  (?C255)A(?C3)B(?C255)
+
+Here is a more complicated example: +
+  A(\d{2}|--)
+
+With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were +
+  (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
+
+Notice that there is a callout before and after each parenthesis and +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the +condition. Such a callout may also be inserted explicitly, for example: +
+  (?(?C9)(?=a)ab|de)  (?(?C%text%)(?!=d)ab|de)
+
+This applies only to assertion conditions (because they are themselves +independent groups). +

+

+Callouts can be useful for tracking the progress of pattern matching. The +pcre2test +program has a pattern qualifier (/auto_callout) that sets automatic callouts. +When any callouts are present, the output from pcre2test indicates how +the pattern is being matched. This is useful information when you are trying to +optimize the performance of a particular pattern. +

+
MISSING CALLOUTS
+

+You should be aware that, because of optimizations in the way PCRE2 compiles +and matches patterns, callouts sometimes do not happen exactly as you might +expect. +

+
+Auto-possessification +
+

+At compile time, PCRE2 "auto-possessifies" repeated items when it knows that +what follows cannot be part of the repeat. For example, a+[bc] is compiled as +if it were a++[bc]. The pcre2test output when this pattern is compiled +with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string +"aaaa" is: +

+  --->aaaa
+   +0 ^        a+
+   +2 ^   ^    [bc]
+  No match
+
+This indicates that when matching [bc] fails, there is no backtracking into a+ +(because it is being treated as a++) and therefore the callouts that would be +taken for the backtracks do not occur. You can disable the auto-possessify +feature by passing PCRE2_NO_AUTO_POSSESS to pcre2_compile(), or starting +the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this: +
+  --->aaaa
+   +0 ^        a+
+   +2 ^   ^    [bc]
+   +2 ^  ^     [bc]
+   +2 ^ ^      [bc]
+   +2 ^^       [bc]
+  No match
+
+This time, when matching [bc] fails, the matcher backtracks into a+ and tries +again, repeatedly, until a+ itself fails. +

+
+Automatic .* anchoring +
+

+By default, an optimization is applied when .* is the first significant item in +a pattern. If PCRE2_DOTALL is set, so that the dot can match any character, the +pattern is automatically anchored. If PCRE2_DOTALL is not set, a match can +start only after an internal newline or at the beginning of the subject, and +pcre2_compile() remembers this. If a pattern has more than one top-level +branch, automatic anchoring occurs if all branches are anchorable. +

+

+This optimization is disabled, however, if .* is in an atomic group or if there +is a backreference to the capture group in which it appears. It is also +disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of +callouts does not affect it. +

+

+For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT and +applied to the string "aa", the pcre2test output is: +

+  --->aa
+   +0 ^      .*
+   +2 ^ ^    \d
+   +2 ^^     \d
+   +2 ^      \d
+  No match
+
+This shows that all match attempts start at the beginning of the subject. In +other words, the pattern is anchored. You can disable this optimization by +passing PCRE2_NO_DOTSTAR_ANCHOR to pcre2_compile(), or starting the +pattern with (*NO_DOTSTAR_ANCHOR). In this case, the output changes to: +
+  --->aa
+   +0 ^      .*
+   +2 ^ ^    \d
+   +2 ^^     \d
+   +2 ^      \d
+   +0  ^     .*
+   +2  ^^    \d
+   +2  ^     \d
+  No match
+
+This shows more match attempts, starting at the second subject character. +Another optimization, described in the next section, means that there is no +subsequent attempt to match with an empty subject. +

+
+Other optimizations +
+

+Other optimizations that provide fast "no match" results also affect callouts. +For example, if the pattern is +

+  ab(?C4)cd
+
+PCRE2 knows that any matching string must contain the letter "d". If the +subject string is "abyz", the lack of "d" means that matching doesn't ever +start, and the callout is never reached. However, with "abyd", though the +result is still no match, the callout is obeyed. +

+

+For most patterns PCRE2 also knows the minimum length of a matching string, and +will immediately give a "no match" return without actually running a match if +the subject is not long enough, or, for unanchored patterns, if it has been +scanned far enough. +

+

+You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE +option to pcre2_compile(), or by starting the pattern with +(*NO_START_OPT). This slows down the matching process, but does ensure that +callouts such as the example above are obeyed. +

+
THE CALLOUT INTERFACE
+

+During matching, when PCRE2 reaches a callout point, if an external function is +provided in the match context, it is called. This applies to both normal, +DFA, and JIT matching. The first argument to the callout function is a pointer +to a pcre2_callout block. The second argument is the void * callout data +that was supplied when the callout was set up by calling +pcre2_set_callout() (see the +pcre2api +documentation). The callout block structure contains the following fields, not +necessarily in this order: +

+  uint32_t      version;
+  uint32_t      callout_number;
+  uint32_t      capture_top;
+  uint32_t      capture_last;
+  uint32_t      callout_flags;
+  PCRE2_SIZE   *offset_vector;
+  PCRE2_SPTR    mark;
+  PCRE2_SPTR    subject;
+  PCRE2_SIZE    subject_length;
+  PCRE2_SIZE    start_match;
+  PCRE2_SIZE    current_position;
+  PCRE2_SIZE    pattern_position;
+  PCRE2_SIZE    next_item_length;
+  PCRE2_SIZE    callout_string_offset;
+  PCRE2_SIZE    callout_string_length;
+  PCRE2_SPTR    callout_string;
+
+The version field contains the version number of the block format. The +current version is 2; the three callout string fields were added for version 1, +and the callout_flags field for version 2. If you are writing an +application that might use an earlier release of PCRE2, you should check the +version number before accessing any of these fields. The version number will +increase in future if more fields are added, but the intention is never to +remove any of the existing fields. +

+
+Fields for numerical callouts +
+

+For a numerical callout, callout_string is NULL, and callout_number +contains the number of the callout, in the range 0-255. This is the number +that follows (?C for callouts that part of the pattern; it is 255 for +automatically generated callouts. +

+
+Fields for string callouts +
+

+For callouts with string arguments, callout_number is always zero, and +callout_string points to the string that is contained within the compiled +pattern. Its length is given by callout_string_length. Duplicated ending +delimiters that were present in the original pattern string have been turned +into single characters, but there is no other processing of the callout string +argument. An additional code unit containing binary zero is present after the +string, but is not included in the length. The delimiter that was used to start +the string is also stored within the pattern, immediately before the string +itself. You can access this delimiter as callout_string[-1] if you need +it. +

+

+The callout_string_offset field is the code unit offset to the start of +the callout argument string within the original pattern string. This is +provided for the benefit of applications such as script languages that might +need to report errors in the callout string within the pattern. +

+
+Fields for all callouts +
+

+The remaining fields in the callout block are the same for both kinds of +callout. +

+

+The offset_vector field is a pointer to a vector of capturing offsets +(the "ovector"). You may read the elements in this vector, but you must not +change any of them. +

+

+For calls to pcre2_match(), the offset_vector field is not (since +release 10.30) a pointer to the actual ovector that was passed to the matching +function in the match data block. Instead it points to an internal ovector of a +size large enough to hold all possible captured substrings in the pattern. Note +that whenever a recursion or subroutine call within a pattern completes, the +capturing state is reset to what it was before. +

+

+The capture_last field contains the number of the most recently captured +substring, and the capture_top field contains one more than the number of +the highest numbered captured substring so far. If no substrings have yet been +captured, the value of capture_last is 0 and the value of +capture_top is 1. The values of these fields do not always differ by one; +for example, when the callout in the pattern ((a)(b))(?C2) is taken, +capture_last is 1 but capture_top is 4. +

+

+The contents of ovector[2] to ovector[<capture_top>*2-1] can be inspected in +order to extract substrings that have been matched so far, in the same way as +extracting substrings after a match has completed. The values in ovector[0] and +ovector[1] are always PCRE2_UNSET because the match is by definition not +complete. Substrings that have not been captured but whose numbers are less +than capture_top also have both of their ovector slots set to +PCRE2_UNSET. +

+

+For DFA matching, the offset_vector field points to the ovector that was +passed to the matching function in the match data block for callouts at the top +level, but to an internal ovector during the processing of pattern recursions, +lookarounds, and atomic groups. However, these ovectors hold no useful +information because pcre2_dfa_match() does not support substring +capturing. The value of capture_top is always 1 and the value of +capture_last is always 0 for DFA matching. +

+

+The subject and subject_length fields contain copies of the values +that were passed to the matching function. +

+

+The start_match field normally contains the offset within the subject at +which the current match attempt started. However, if the escape sequence \K +has been encountered, this value is changed to reflect the modified starting +point. If the pattern is not anchored, the callout function may be called +several times from the same point in the pattern for different starting points +in the subject. +

+

+The current_position field contains the offset within the subject of the +current match pointer. +

+

+The pattern_position field contains the offset in the pattern string to +the next item to be matched. +

+

+The next_item_length field contains the length of the next item to be +processed in the pattern string. When the callout is at the end of the pattern, +the length is zero. When the callout precedes an opening parenthesis, the +length includes meta characters that follow the parenthesis. For example, in a +callout before an assertion such as (?=ab) the length is 3. For an alternation +bar or a closing parenthesis, the length is one, unless a closing parenthesis +is followed by a quantifier, in which case its length is included. (This +changed in release 10.23. In earlier releases, before an opening parenthesis +the length was that of the entire group, and before an alternation bar or a +closing parenthesis the length was zero.) +

+

+The pattern_position and next_item_length fields are intended to +help in distinguishing between different automatic callouts, which all have the +same callout number. However, they are set for all callouts, and are used by +pcre2test to show the next item to be matched when displaying callout +information. +

+

+In callouts from pcre2_match() the mark field contains a pointer to +the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or +(*THEN) item in the match, or NULL if no such items have been passed. Instances +of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In +callouts from the DFA matching function this field always contains NULL. +

+

+The callout_flags field is always zero in callouts from +pcre2_dfa_match() or when JIT is being used. When pcre2_match() +without JIT is used, the following bits may be set: +

+  PCRE2_CALLOUT_STARTMATCH
+
+This is set for the first callout after the start of matching for each new +starting position in the subject. +
+  PCRE2_CALLOUT_BACKTRACK
+
+This is set if there has been a matching backtrack since the previous callout, +or since the start of matching if this is the first callout from a +pcre2_match() run. +

+

+Both bits are set when a backtrack has caused a "bumpalong" to a new starting +position in the subject. Output from pcre2test does not indicate the +presence of these bits unless the callout_extra modifier is set. +

+

+The information in the callout_flags field is provided so that +applications can track and tell their users how matching with backtracking is +done. This can be useful when trying to optimize patterns, or just to +understand how PCRE2 works. There is no support in pcre2_dfa_match() +because there is no backtracking in DFA matching, and there is no support in +JIT because JIT is all about maximimizing matching performance. In both these +cases the callout_flags field is always zero. +

+
RETURN VALUES FROM CALLOUTS
+

+The external callout function returns an integer to PCRE2. If the value is +zero, matching proceeds as normal. If the value is greater than zero, matching +fails at the current point, but the testing of other matching possibilities +goes ahead, just as if a lookahead assertion had failed. If the value is less +than zero, the match is abandoned, and the matching function returns the +negative value. +

+

+Negative values should normally be chosen from the set of PCRE2_ERROR_xxx +values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match" +failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout +functions; it will never be used by PCRE2 itself. +

+
CALLOUT ENUMERATION
+

+int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); +
+
+A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling pcre2_callout_enumerate(). The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +user_data value that was passed to pcre2_callout_enumerate(). The +data block contains the following fields: +

+  version                Block version number
+  pattern_position       Offset to next item in pattern
+  next_item_length       Length of next item in pattern
+  callout_number         Number for numbered callouts
+  callout_string_offset  Offset to string within pattern
+  callout_string_length  Length of callout string
+  callout_string         Points to callout string or is NULL
+
+The version number is currently 0. It will increase if new fields are ever +added to the block. The remaining fields are the same as their namesakes in the +pcre2_callout block that is used for callouts during matching, as +described +above. +

+

+Note that the value of pattern_position is unique for each callout. +However, if a callout occurs inside a group that is quantified with a non-zero +minimum or a fixed maximum, the group is replicated inside the compiled +pattern. For example, a pattern such as /(a){2}/ is compiled as if it were +/(a)(a)/. This means that the callout will be enumerated more than once, but +with the same value for pattern_position in each case. +

+

+The callback function should normally return zero. If it returns a non-zero +value, scanning the pattern stops, and that value is returned from +pcre2_callout_enumerate(). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 19 January 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2compat.html b/3rd/pcre2/doc/html/pcre2compat.html new file mode 100644 index 00000000..5f7e280d --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2compat.html @@ -0,0 +1,299 @@ + + +pcre2compat specification + + +

pcre2compat man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+DIFFERENCES BETWEEN PCRE2 AND PERL +
+

+This document describes some of the known differences in the ways that PCRE2 +and Perl handle regular expressions. The differences described here are with +respect to Perl version 5.38.0, but as both Perl and PCRE2 are continually +changing, the information may at times be out of date. +

+

+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +

+

+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +have are given in the +pcre2unicode +page. +

+

+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +they do not mean what you might think. For example, (?!a){3} does not assert +that the next three characters are not "a". It just asserts that the next +character is not "a" three times (in principle; PCRE2 optimizes this to run the +assertion just once). Perl allows some repeat quantifiers on other assertions, +for example, \b* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. +

+

+4. If a braced quantifier such as {1,2} appears where there is nothing to +repeat (for example, at the start of a branch), PCRE2 raises an error whereas +Perl treats the quantifier characters as literal. +

+

+5. Capture groups that occur inside negative lookaround assertions are counted, +but their entries in the offsets vector are set only when a negative assertion +is a condition that has a matching branch (that is, the condition is false). +Perl may set such capture groups in other circumstances. +

+

+6. The following Perl escape sequences are not supported: \F, \l, \L, \u, +\U, and \N when followed by a character name. \N on its own, matching a +non-newline character, and \N{U+dd..}, matching a Unicode code point, are +supported. The escapes that modify the case of following letters are +implemented by Perl's general string-handling and are not part of its pattern +matching engine. If any of these are encountered by PCRE2, an error is +generated by default. However, if either of the PCRE2_ALT_BSUX or +PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript +interprets them. +

+

+7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is +built with Unicode support (the default). The properties that can be tested +with \p and \P are limited to the general category properties such as Lu and +Nd, the derived properties Any and Lc (synonym L&), script names such as Greek +or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and +Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See +the +pcre2pattern +documentation for details. The long synonyms for property names that Perl +supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted +to prefix any of these properties with "Is". +

+

+8. PCRE2 supports the \Q...\E escape for quoting substrings. Characters +in between are treated as literals. However, this is slightly different from +Perl in that $ and @ are also handled as literals inside the quotes. In Perl, +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \Q +and \E which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \Q and \E just like any other character. Note the +following examples: +

+    Pattern            PCRE2 matches     Perl matches
+
+    \Qabc$xyz\E        abc$xyz           abc followed by the contents of $xyz
+    \Qabc\$xyz\E       abc\$xyz          abc\$xyz
+    \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz
+    \QA\B\E            A\B               A\B
+    \Q\\E              \                 \\E
+
+The \Q...\E sequence is recognized both inside and outside character classes +by both PCRE2 and Perl. Another difference from Perl is that any appearance of +\Q or \E inside what might otherwise be a quantifier causes PCRE2 not to +recognize the sequence as a quantifier. Perl recognizes a quantifier if +(redundantly) either of the numbers is inside \Q...\E, but not if the +separating comma is. When not recognized as a quantifier a sequence such as +{\Q1\E,2} is treated as the literal string "{1,2}". +

+

+9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +constructions. However, PCRE2 does have a "callout" feature, which allows an +external function to be called during pattern matching. See the +pcre2callout +documentation for details. +

+

+10. Subroutine calls (whether recursive or not) were treated as atomic groups +up to PCRE2 release 10.23, but from release 10.30 this changed, and +backtracking into subroutine calls is now supported, as in Perl. +

+

+11. In PCRE2, if any of the backtracking control verbs are used in a group that +is called as a subroutine (whether or not recursively), their effect is +confined to that group; it does not extend to the surrounding pattern. This is +not always the case in Perl. In particular, if (*THEN) is present in a group +that is called as a subroutine, its action is limited to that group, even if +the group does not contain any | characters. Note that such groups are +processed as anchored at the point where they are tested. PCRE2 also confines +all control verbs within atomic assertions, again including (*THEN) in +assertions with only one branch. +

+

+12. If a pattern contains more than one backtracking control verb, the first +one that is backtracked onto acts. For example, in the pattern +A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C +triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the +same as PCRE2, but there are cases where it differs. +

+

+13. There are some differences that are concerned with the settings of captured +strings when part of a pattern is repeated. For example, matching "aba" against +the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to +"b". +

+

+14. PCRE2's handling of duplicate capture group numbers and names is not as +general as Perl's. This is a consequence of the fact the PCRE2 works internally +just with numbers, using an external table to translate between numbers and +names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two +capture groups have the same number but different names, is not supported, and +causes an error at compile time. If it were allowed, it would not be possible +to distinguish which group matched, because both names map to capture group +number 1. To avoid this confusing situation, an error is given at compile time. +

+

+15. Perl used to recognize comments in some places that PCRE2 does not, for +example, between the ( and ? at the start of a group. If the /x modifier is +set, Perl allowed white space between ( and ? though the latest Perls give an +error (for a while it was just deprecated). There may still be some cases where +Perl behaves differently. +

+

+16. Perl, when in warning mode, gives warnings for character classes such as +[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no +warning features, so it gives an error in these cases because they are almost +certainly user mistakes. +

+

+17. In PCRE2, until release 10.45, the upper/lower case character properties Lu +and Ll were not affected when case-independent matching was specified. Perl has +changed in this respect, and PCRE2 has now changed to match. When caseless +matching is in force, Lu, Ll, and Lt (title case) are all treated as Lc (cased +letter). +

+

+18. From release 5.32.0, Perl locks out the use of \K in lookaround +assertions. From release 10.38 PCRE2 does the same by default. However, there +is an option for re-enabling the previous behaviour. When this option is set, +\K is acted on when it occurs in positive assertions, but is ignored in +negative assertions. +

+

+19. PCRE2 provides some extensions to the Perl regular expression facilities. +Perl 5.10 included new features that were not in earlier versions of Perl, some +of which (such as named parentheses) were in PCRE2 for some time before. This +list is with respect to Perl 5.38: +
+
+(a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ +meta-character matches only at the very end of the string. +
+
+(b) A backslash followed by a letter with no special meaning is faulted. (Perl +can be made to issue a warning.) +
+
+(c) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is +inverted, that is, by default they are not greedy, but if followed by a +question mark they are. +
+
+(d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried +only at the first matching position in the subject string. +
+
+(e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART +options have no Perl equivalents. +
+
+(f) The \R escape sequence can be restricted to match only CR, LF, or CRLF +by the PCRE2_BSR_ANYCRLF option. +
+
+(g) The callout facility is PCRE2-specific. Perl supports codeblocks and +variable interpolation, but not general hooks on every match. +
+
+(h) The partial matching facility is PCRE2-specific. +
+
+(i) The alternative matching function (pcre2_dfa_match() matches in a +different way and is not Perl-compatible. +
+
+(j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at +the start of a pattern. These set overall options that cannot be changed within +the pattern. +
+
+(k) PCRE2 supports non-atomic positive lookaround assertions. This is an +extension to the lookaround facilities. The default, Perl-compatible +lookarounds are atomic. +
+
+(l) There are three syntactical items in patterns that can refer to a capturing +group by number: back references such as \g{2}, subroutine calls such as (?3), +and condition references such as (?(4)...). PCRE2 supports relative group +numbers such as +2 and -4 in all three cases. Perl supports both plus and minus +for subroutine calls, but only minus for back references, and no relative +numbering at all for conditions. +
+
+(m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 extension +that is not available in Perl. +

+

+20. Perl has different limits than PCRE2. See the +pcre2limit +documentation for details. Perl went with 5.10 from recursion to iteration +keeping the intermediate matches on the heap, which is ~10% slower but does not +fall into any stack-overflow limit. PCRE2 made a similar change at release +10.30, and also has many build-time and run-time customizable limits. +

+

+21. Unlike Perl, PCRE2 doesn't have character set modifiers and specially no way +to set characters by context just like Perl's "/d". A regular expression using +PCRE2_UTF and PCRE2_UCP will use similar rules to Perl's "/u"; something closer +to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top. +

+

+22. Some recursive patterns that Perl diagnoses as infinite recursions can be +handled by PCRE2, either by the interpreter or the JIT. An example is +/(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated +"abcd" substrings at the end of the subject. +

+

+23. Both PCRE2 and Perl error when \x{ escapes are invalid, but Perl tries to +recover and prints a warning if the problem was that an invalid hexadecimal +digit was found, since PCRE2 doesn't have warnings it returns an error instead. +Additionally, Perl accepts \x{} and generates NUL unlike PCRE2. +

+

+24. From release 10.45, PCRE2 gives an error if \x is not followed by a +hexadecimal digit or a curly bracket. It used to interpret this as the NUL +character. Perl still generates NUL, but warns when in warning mode in most +cases. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 02 October 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2convert.html b/3rd/pcre2/doc/html/pcre2convert.html new file mode 100644 index 00000000..57e8989f --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2convert.html @@ -0,0 +1,191 @@ + + +pcre2convert specification + + +

pcre2convert man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
EXPERIMENTAL PATTERN CONVERSION FUNCTIONS
+

+This document describes a set of functions that can be used to convert +"foreign" patterns into PCRE2 regular expressions. This facility is currently +experimental, and may be changed in future releases. Two kinds of pattern, +globs and POSIX patterns, are supported. +

+
THE CONVERT CONTEXT
+

+pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); +
+
+pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); +
+
+void pcre2_convert_context_free(pcre2_convert_context *cvcontext); +
+
+int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); +
+
+int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); +
+
+A convert context is used to hold parameters that affect the way that pattern +conversion works. Like all PCRE2 contexts, you need to use a context only if +you want to override the defaults. There are the usual create, copy, and free +functions. If custom memory management functions are set in a general context +that is passed to pcre2_convert_context_create(), they are used for all +memory management within the conversion functions. +

+

+There are only two parameters in the convert context at present. Both apply +only to glob conversions. The escape character defaults to grave accent under +Windows, otherwise backslash. It can be set to zero, meaning no escape +character, or to any punctuation character with a code point less than 256. +The separator character defaults to backslash under Windows, otherwise forward +slash. It can be set to forward slash, backslash, or dot. +

+

+The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if +their second argument is invalid. +

+
THE CONVERSION FUNCTION
+

+int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); +
+
+void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); +
+
+The first two arguments of pcre2_pattern_convert() define the foreign +pattern that is to be converted. The length may be given as +PCRE2_ZERO_TERMINATED. The options argument defines how the pattern is to +be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set. +PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid. +One or more of the glob options, or one of the following POSIX options must be +set to define the type of conversion that is required: +

+  PCRE2_CONVERT_GLOB
+  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+  PCRE2_CONVERT_GLOB_NO_STARSTAR
+  PCRE2_CONVERT_POSIX_BASIC
+  PCRE2_CONVERT_POSIX_EXTENDED
+
+Details of the conversions are given below. The buffer and blength +arguments define how the output is handled: +

+

+If buffer is NULL, the function just returns the length of the converted +pattern via blength. This is one less than the length of buffer needed, +because a terminating zero is always added to the output. +

+

+If buffer points to a NULL pointer, an output buffer is obtained using +the allocator in the context or malloc() if no context is supplied. A +pointer to this buffer is placed in the variable to which buffer points. +When no longer needed the output buffer must be freed by calling +pcre2_converted_pattern_free(). If this function is called with a NULL +argument, it returns immediately without doing anything. +

+

+If buffer points to a non-NULL pointer, blength must be set to the +actual length of the buffer provided (in code units). +

+

+In all cases, after successful conversion, the variable pointed to by +blength is updated to the length actually used (in code units), excluding +the terminating zero that is always added. +

+

+If an error occurs, the length (via blength) is set to the offset +within the input pattern where the error was detected. Only gross syntax errors +are caught; there are plenty of errors that will get passed on for +pcre2_compile() to discover. +

+

+The return from pcre2_pattern_convert() is zero on success or a non-zero +PCRE2 error code. Note that PCRE2 error codes may be positive or negative: +pcre2_compile() uses mostly positive codes and pcre2_match() +negative ones; pcre2_convert() uses existing codes of both kinds. A +textual error message can be obtained by calling +pcre2_get_error_message(). +

+
CONVERTING GLOBS
+

+Globs are used to match file names, and consequently have the concept of a +"path separator", which defaults to backslash under Windows and forward slash +otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not +permitted to match separator characters, but the double-star (**) feature +(which does match separators) is supported. +

+

+PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to +match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with +the double-star feature disabled. These options may be given together. +

+
CONVERTING POSIX PATTERNS
+

+POSIX defines two kinds of regular expression pattern: basic and extended. +These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or +PCRE2_CONVERT_POSIX_EXTENDED, respectively. +

+

+In POSIX patterns, backslash is not special in a character class. Unmatched +closing parentheses are treated as literals. +

+

+In basic patterns, ? + | {} and () must be escaped to be recognized +as metacharacters outside a character class. If the first character in the +pattern is * it is treated as a literal. ^ is a metacharacter only at the start +of a branch. +

+

+In extended patterns, a backslash not in a character class always +makes the next character literal, whatever it is. There are no backreferences. +

+

+Note: POSIX mandates that the longest possible match at the first matching +position must be found. This is not what pcre2_match() does; it yields +the first match that is found. An application can use pcre2_dfa_match() +to find the longest match, but that does not support backreferences (but then +neither do POSIX extended patterns). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 14 November 2023 +
+Copyright © 1997-2018 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2demo.html b/3rd/pcre2/doc/html/pcre2demo.html new file mode 100644 index 00000000..1cb7e0a7 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2demo.html @@ -0,0 +1,518 @@ + + +pcre2demo specification + + +

pcre2demo man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SOURCE CODE +
+

+/*************************************************
+*           PCRE2 DEMONSTRATION PROGRAM          *
+*************************************************/
+
+/* This is a demonstration program to illustrate a straightforward way of
+using the PCRE2 regular expression library from a C program. See the
+pcre2sample documentation for a short discussion ("man pcre2sample" if you have
+the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
+incompatible with the original PCRE API.
+
+There are actually three libraries, each supporting a different code unit
+width. This demonstration program uses the 8-bit library. The default is to
+process each code unit as a separate character, but if the pattern begins with
+"(*UTF)", both it and the subject are treated as UTF-8 strings, where
+characters may occupy multiple code units.
+
+In Unix-like environments, if PCRE2 is installed in your standard system
+libraries, you should be able to compile this program using this command:
+
+cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo
+
+If PCRE2 is not installed in a standard place, it is likely to be installed
+with support for the pkg-config mechanism. If you have pkg-config, you can
+compile this program using this command:
+
+cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo
+
+If you do not have pkg-config, you may have to use something like this:
+
+cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \
+  -R/usr/local/lib -lpcre2-8 -o pcre2demo
+
+Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
+library files for PCRE2 are installed on your system. Only some operating
+systems (Solaris is one) use the -R option.
+
+Building under Windows:
+
+If you want to statically link this program against a non-dll .a file, you must
+define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment
+the following line. */
+
+/* #define PCRE2_STATIC */
+
+/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+For a program that uses only one code unit width, setting it to 8, 16, or 32
+makes it possible to use generic function names such as pcre2_compile(). Note
+that just changing 8 to 16 (for example) is not sufficient to convert this
+program to process 16-bit characters. Even in a fully 16-bit environment, where
+string-handling functions such as strcmp() and printf() work with 16-bit
+characters, the code for handling the table of named substrings will still need
+to be modified. */
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <stdio.h>
+#include <string.h>
+#include <pcre2.h>
+
+
+/**************************************************************************
+* Here is the program. The API includes the concept of "contexts" for     *
+* setting up unusual interface requirements for compiling and matching,   *
+* such as custom memory managers and non-standard newline definitions.    *
+* This program does not do any of this, so it makes no use of contexts,   *
+* always passing NULL where a context could be given.                     *
+**************************************************************************/
+
+int main(int argc, char **argv)
+{
+pcre2_code *re;
+PCRE2_SPTR pattern;     /* PCRE2_SPTR is a pointer to unsigned code units of */
+PCRE2_SPTR subject;     /* the appropriate width (in this case, 8 bits). */
+PCRE2_SPTR name_table;
+
+int crlf_is_newline;
+int errornumber;
+int find_all;
+int i;
+int rc;
+int utf8;
+
+uint32_t option_bits;
+uint32_t namecount;
+uint32_t name_entry_size;
+uint32_t newline;
+
+PCRE2_SIZE erroroffset;
+PCRE2_SIZE *ovector;
+PCRE2_SIZE subject_length;
+
+pcre2_match_data *match_data;
+
+
+/**************************************************************************
+* First, sort out the command line. There is only one possible option at  *
+* the moment, "-g" to request repeated matching to find all occurrences,  *
+* like Perl's /g option. We set the variable find_all to a non-zero value *
+* if the -g option is present.                                            *
+**************************************************************************/
+
+find_all = 0;
+for (i = 1; i < argc; i++)
+  {
+  if (strcmp(argv[i], "-g") == 0) find_all = 1;
+  else if (argv[i][0] == '-')
+    {
+    printf("Unrecognised option %s\n", argv[i]);
+    return 1;
+    }
+  else break;
+  }
+
+/* After the options, we require exactly two arguments, which are the pattern,
+and the subject string. */
+
+if (argc - i != 2)
+  {
+  printf("Exactly two arguments required: a regex and a subject string\n");
+  return 1;
+  }
+
+/* Pattern and subject are char arguments, so they can be straightforwardly
+cast to PCRE2_SPTR because we are working in 8-bit code units. The subject
+length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact
+defined to be size_t. */
+
+pattern = (PCRE2_SPTR)argv[i];
+subject = (PCRE2_SPTR)argv[i+1];
+subject_length = (PCRE2_SIZE)strlen((char *)subject);
+
+
+/*************************************************************************
+* Now we are going to compile the regular expression pattern, and handle *
+* any errors that are detected.                                          *
+*************************************************************************/
+
+re = pcre2_compile(
+  pattern,               /* the pattern */
+  PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
+  0,                     /* default options */
+  &errornumber,          /* for error number */
+  &erroroffset,          /* for error offset */
+  NULL);                 /* use default compile context */
+
+/* Compilation failed: print the error message and exit. */
+
+if (re == NULL)
+  {
+  PCRE2_UCHAR buffer[256];
+  pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
+  printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
+    buffer);
+  return 1;
+  }
+
+
+/*************************************************************************
+* If the compilation succeeded, we call PCRE2 again, in order to do a    *
+* pattern match against the subject string. This does just ONE match. If *
+* further matching is needed, it will be done below. Before running the  *
+* match we must set up a match_data block for holding the result. Using  *
+* pcre2_match_data_create_from_pattern() ensures that the block is       *
+* exactly the right size for the number of capturing parentheses in the  *
+* pattern. If you need to know the actual size of a match_data block as  *
+* a number of bytes, you can find it like this:                          *
+*                                                                        *
+* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data);    *
+*************************************************************************/
+
+match_data = pcre2_match_data_create_from_pattern(re, NULL);
+
+/* Now run the match. */
+
+rc = pcre2_match(
+  re,                   /* the compiled pattern */
+  subject,              /* the subject string */
+  subject_length,       /* the length of the subject */
+  0,                    /* start at offset 0 in the subject */
+  0,                    /* default options */
+  match_data,           /* block for storing the result */
+  NULL);                /* use default match context */
+
+/* Matching failed: handle error cases */
+
+if (rc < 0)
+  {
+  switch(rc)
+    {
+    case PCRE2_ERROR_NOMATCH: printf("No match\n"); break;
+    /*
+    Handle other special cases if you like
+    */
+    default: printf("Matching error %d\n", rc); break;
+    }
+  pcre2_match_data_free(match_data);   /* Release memory used for the match */
+  pcre2_code_free(re);                 /*   data and the compiled pattern. */
+  return 1;
+  }
+
+/* Match succeeded. Get a pointer to the output vector, where string offsets
+are stored. */
+
+ovector = pcre2_get_ovector_pointer(match_data);
+printf("Match succeeded at offset %d\n", (int)ovector[0]);
+
+
+/*************************************************************************
+* We have found the first match within the subject string. If the output *
+* vector wasn't big enough, say so. Then output any substrings that were *
+* captured.                                                              *
+*************************************************************************/
+
+/* The output vector wasn't big enough. This should not happen, because we used
+pcre2_match_data_create_from_pattern() above. */
+
+if (rc == 0)
+  printf("ovector was not big enough for all the captured substrings\n");
+
+/* Since release 10.38 PCRE2 has locked out the use of \K in lookaround
+assertions. However, there is an option to re-enable the old behaviour. If that
+is set, it is possible to run patterns such as /(?=.\K)/ that use \K in an
+assertion to set the start of a match later than its end. In this demonstration
+program, we show how to detect this case, but it shouldn't arise because the
+option is never set. */
+
+if (ovector[0] > ovector[1])
+  {
+  printf("\\K was used in an assertion to set the match start after its end.\n"
+    "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
+      (char *)(subject + ovector[1]));
+  printf("Run abandoned\n");
+  pcre2_match_data_free(match_data);
+  pcre2_code_free(re);
+  return 1;
+  }
+
+/* Show substrings stored in the output vector by number. Obviously, in a real
+application you might want to do things other than print them. */
+
+for (i = 0; i < rc; i++)
+  {
+  PCRE2_SPTR substring_start = subject + ovector[2*i];
+  PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i];
+  printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+  }
+
+
+/**************************************************************************
+* That concludes the basic part of this demonstration program. We have    *
+* compiled a pattern, and performed a single match. The code that follows *
+* shows first how to access named substrings, and then how to code for    *
+* repeated matches on the same subject.                                   *
+**************************************************************************/
+
+/* See if there are any named substrings, and if so, show them by name. First
+we have to extract the count of named parentheses from the pattern. */
+
+(void)pcre2_pattern_info(
+  re,                   /* the compiled pattern */
+  PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */
+  &namecount);          /* where to put the answer */
+
+if (namecount == 0) printf("No named substrings\n"); else
+  {
+  PCRE2_SPTR tabptr;
+  printf("Named substrings\n");
+
+  /* Before we can access the substrings, we must extract the table for
+  translating names to numbers, and the size of each entry in the table. */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMETABLE,     /* address of the table */
+    &name_table);             /* where to put the answer */
+
+  (void)pcre2_pattern_info(
+    re,                       /* the compiled pattern */
+    PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
+    &name_entry_size);        /* where to put the answer */
+
+  /* Now we can scan the table and, for each entry, print the number, the name,
+  and the substring itself. In the 8-bit library the number is held in two
+  bytes, most significant first. */
+
+  tabptr = name_table;
+  for (i = 0; i < namecount; i++)
+    {
+    int n = (tabptr[0] << 8) | tabptr[1];
+    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+      (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+    tabptr += name_entry_size;
+    }
+  }
+
+
+/*************************************************************************
+* If the "-g" option was given on the command line, we want to continue  *
+* to search for additional matches in the subject string, in a similar   *
+* way to the /g option in Perl. This turns out to be trickier than you   *
+* might think because of the possibility of matching an empty string.    *
+* What happens is as follows:                                            *
+*                                                                        *
+* If the previous match was NOT for an empty string, we can just start   *
+* the next match at the end of the previous one.                         *
+*                                                                        *
+* If the previous match WAS for an empty string, we can't do that, as it *
+* would lead to an infinite loop. Instead, a call of pcre2_match() is    *
+* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The *
+* first of these tells PCRE2 that an empty string at the start of the    *
+* subject is not a valid match; other possibilities must be tried. The   *
+* second flag restricts PCRE2 to one match attempt at the initial string *
+* position. If this match succeeds, an alternative to the empty string   *
+* match has been found, and we can print it and proceed round the loop,  *
+* advancing by the length of whatever was found. If this match does not  *
+* succeed, we still stay in the loop, advancing by just one character.   *
+* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be  *
+* more than one byte.                                                    *
+*                                                                        *
+* However, there is a complication concerned with newlines. When the     *
+* newline convention is such that CRLF is a valid newline, we must       *
+* advance by two characters rather than one. The newline convention can  *
+* be set in the regex by (*CR), etc.; if not, we must find the default.  *
+*************************************************************************/
+
+if (!find_all)     /* Check for -g */
+  {
+  pcre2_match_data_free(match_data);  /* Release the memory that was used */
+  pcre2_code_free(re);                /* for the match data and the pattern. */
+  return 0;                           /* Exit the program. */
+  }
+
+/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
+sequence. First, find the options with which the regex was compiled and extract
+the UTF state. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits);
+utf8 = (option_bits & PCRE2_UTF) != 0;
+
+/* Now find the newline convention and see whether CRLF is a valid newline
+sequence. */
+
+(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
+crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
+                  newline == PCRE2_NEWLINE_CRLF ||
+                  newline == PCRE2_NEWLINE_ANYCRLF;
+
+/* Loop for second and subsequent matches */
+
+for (;;)
+  {
+  uint32_t options = 0;                   /* Normally no options */
+  PCRE2_SIZE start_offset = ovector[1];   /* Start at end of previous match */
+
+  /* If the previous match was for an empty string, we are finished if we are
+  at the end of the subject. Otherwise, arrange to run another match at the
+  same point to see if a non-empty match can be found. */
+
+  if (ovector[0] == ovector[1])
+    {
+    if (ovector[0] == subject_length) break;
+    options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+    }
+
+  /* If the previous match was not an empty string, there is one tricky case to
+  consider. If a pattern contains \K within a lookbehind assertion at the
+  start, the end of the matched string can be at the offset where the match
+  started. Without special action, this leads to a loop that keeps on matching
+  the same substring. We must detect this case and arrange to move the start on
+  by one character. The pcre2_get_startchar() function returns the starting
+  offset that was passed to pcre2_match(). */
+
+  else
+    {
+    PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
+    if (start_offset <= startchar)
+      {
+      if (startchar >= subject_length) break;   /* Reached end of subject.   */
+      start_offset = startchar + 1;             /* Advance by one character. */
+      if (utf8)                                 /* If UTF-8, it may be more  */
+        {                                       /*   than one code unit.     */
+        for (; start_offset < subject_length; start_offset++)
+          if ((subject[start_offset] & 0xc0) != 0x80) break;
+        }
+      }
+    }
+
+  /* Run the next matching operation */
+
+  rc = pcre2_match(
+    re,                   /* the compiled pattern */
+    subject,              /* the subject string */
+    subject_length,       /* the length of the subject */
+    start_offset,         /* starting offset in the subject */
+    options,              /* options */
+    match_data,           /* block for storing the result */
+    NULL);                /* use default match context */
+
+  /* This time, a result of NOMATCH isn't an error. If the value in "options"
+  is zero, it just means we have found all possible matches, so the loop ends.
+  Otherwise, it means we have failed to find a non-empty-string match at a
+  point where there was a previous empty-string match. In this case, we do what
+  Perl does: advance the matching position by one character, and continue. We
+  do this by setting the "end of previous match" offset, because that is picked
+  up at the top of the loop as the point at which to start again.
+
+  There are two complications: (a) When CRLF is a valid newline sequence, and
+  the current position is just before it, advance by an extra byte. (b)
+  Otherwise we must ensure that we skip an entire UTF character if we are in
+  UTF mode. */
+
+  if (rc == PCRE2_ERROR_NOMATCH)
+    {
+    if (options == 0) break;                    /* All matches found */
+    ovector[1] = start_offset + 1;              /* Advance one code unit */
+    if (crlf_is_newline &&                      /* If CRLF is a newline & */
+        start_offset < subject_length - 1 &&    /* we are at CRLF, */
+        subject[start_offset] == '\r' &&
+        subject[start_offset + 1] == '\n')
+      ovector[1] += 1;                          /* Advance by one more. */
+    else if (utf8)                              /* Otherwise, ensure we */
+      {                                         /* advance a whole UTF-8 */
+      while (ovector[1] < subject_length)       /* character. */
+        {
+        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
+        ovector[1] += 1;
+        }
+      }
+    continue;    /* Go round the loop again */
+    }
+
+  /* Other matching errors are not recoverable. */
+
+  if (rc < 0)
+    {
+    printf("Matching error %d\n", rc);
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* Match succeeded */
+
+  printf("\nMatch succeeded again at offset %d\n", (int)ovector[0]);
+
+  /* The match succeeded, but the output vector wasn't big enough. This
+  should not happen. */
+
+  if (rc == 0)
+    printf("ovector was not big enough for all the captured substrings\n");
+
+  /* We must guard against patterns such as /(?=.\K)/ that use \K in an
+  assertion to set the start of a match later than its end. In this
+  demonstration program, we just detect this case and give up. */
+
+  if (ovector[0] > ovector[1])
+    {
+    printf("\\K was used in an assertion to set the match start after its end.\n"
+      "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
+        (char *)(subject + ovector[1]));
+    printf("Run abandoned\n");
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
+  /* As before, show substrings stored in the output vector by number, and then
+  also any named substrings. */
+
+  for (i = 0; i < rc; i++)
+    {
+    PCRE2_SPTR substring_start = subject + ovector[2*i];
+    size_t substring_length = ovector[2*i+1] - ovector[2*i];
+    printf("%2d: %.*s\n", i, (int)substring_length, (char *)substring_start);
+    }
+
+  if (namecount == 0) printf("No named substrings\n"); else
+    {
+    PCRE2_SPTR tabptr = name_table;
+    printf("Named substrings\n");
+    for (i = 0; i < namecount; i++)
+      {
+      int n = (tabptr[0] << 8) | tabptr[1];
+      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
+        (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]);
+      tabptr += name_entry_size;
+      }
+    }
+  }      /* End of loop to find second and subsequent matches */
+
+printf("\n");
+pcre2_match_data_free(match_data);
+pcre2_code_free(re);
+return 0;
+}
+
+/* End of pcre2demo.c */
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2grep.html b/3rd/pcre2/doc/html/pcre2grep.html new file mode 100644 index 00000000..66c56029 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2grep.html @@ -0,0 +1,1135 @@ + + +pcre2grep specification + + +

pcre2grep man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+pcre2grep [options] [long options] [pattern] [path1 path2 ...] +

+
DESCRIPTION
+

+pcre2grep searches files for character patterns, in the same way as other +grep commands do, but it uses the PCRE2 regular expression library to support +patterns that are compatible with the regular expressions of Perl 5. See +pcre2syntax(3) +for a quick-reference summary of pattern syntax, or +pcre2pattern(3) +for a full description of the syntax and semantics of the regular expressions +that PCRE2 supports. +

+

+Patterns, whether supplied on the command line or in a separate file, are given +without delimiters. For example: +

+  pcre2grep Thursday /etc/motd
+
+If you attempt to use delimiters (for example, by surrounding a pattern with +slashes, as is common in Perl scripts), they are interpreted as part of the +pattern. Quotes can of course be used to delimit patterns on the command line +because they are interpreted by the shell, and indeed quotes are required if a +pattern contains white space or shell metacharacters. +

+

+The first argument that follows any option settings is treated as the single +pattern to be matched when neither -e nor -f is present. +Conversely, when one or both of these options are used to specify patterns, all +arguments are treated as path names. At least one of -e, -f, or an +argument pattern must be provided. +

+

+If no files are specified, pcre2grep reads the standard input. The +standard input can also be referenced by a name consisting of a single hyphen. +For example: +

+  pcre2grep some-pattern file1 - file3
+
+By default, input files are searched line by line, so pattern assertions about +the beginning and end of a subject string (^, $, \A, \Z, and \z) match at +the beginning and end of each line. When a line matches a pattern, it is copied +to the standard output, and if there is more than one file, the file name is +output at the start of each line, followed by a colon. However, there are +options that can change how pcre2grep behaves. For example, the -M +option makes it possible to search for strings that span line boundaries. What +defines a line boundary is controlled by the -N (--newline) option. +The -h and -H options control whether or not file names are shown, +and the -Z option changes the file name terminator to a zero byte. +

+

+The amount of memory used for buffering files that are being scanned is +controlled by parameters that can be set by the --buffer-size and +--max-buffer-size options. The first of these sets the size of buffer +that is obtained at the start of processing. If an input file contains very +long lines, a larger buffer may be needed; this is handled by automatically +extending the buffer, up to the limit specified by --max-buffer-size. The +default values for these parameters can be set when pcre2grep is +built; if nothing is specified, the defaults are set to 20KiB and 1MiB +respectively. An error occurs if a line is too long and the buffer can no +longer be expanded. +

+

+The block of memory that is actually used is three times the "buffer size", to +allow for buffering "before" and "after" lines. If the buffer size is too +small, fewer than requested "before" and "after" lines may be output. +

+

+When matching with a multiline pattern, the size of the buffer must be at least +half of the maximum match expected or the pattern might fail to match. +

+

+Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater. +BUFSIZ is defined in <stdio.h>. When there is more than one pattern +(specified by the use of -e and/or -f), each pattern is applied to +each line in the order in which they are defined, except that all the -e +patterns are tried before the -f patterns. +

+

+By default, as soon as one pattern matches a line, no further patterns are +considered. However, if --colour (or --color) is used to colour the +matching substrings, or if --only-matching, --file-offsets, +--line-offsets, or --output is used to output only the part of the +line that matched (either shown literally, or as an offset), the behaviour is +different. In this situation, all the patterns are applied to the line. If +there is more than one match, the one that begins nearest to the start of the +subject is processed; if there is more than one match at that position, the one +with the longest matching substring is processed; if the matching substrings +are equal, the first match found is processed. +

+

+Scanning with all the patterns resumes immediately following the match, so that +later matches on the same line can be found. Note, however, that an overlapping +match that starts in the middle of another match will not be processed. +

+

+The above behaviour was changed at release 10.41 to be more compatible with GNU +grep. In earlier releases, pcre2grep did not recognize matches from +later patterns that were earlier in the subject. +

+

+Patterns that can match an empty string are accepted, but empty string +matches are never recognized. An example is the pattern "(super)?(man)?", in +which all components are optional. This pattern finds all occurrences of both +"super" and "man"; the output differs from matching with "super|man" when only +the matching substrings are being shown. +

+

+If the LC_ALL or LC_CTYPE environment variable is set, +pcre2grep uses the value to set a locale when calling the PCRE2 library. +The --locale option can be used to override this. +

+
SUPPORT FOR COMPRESSED FILES
+

+Compile-time options for pcre2grep can set it up to use libz or +libbz2 for reading compressed files whose names end in .gz or +.bz2, respectively. You can find out whether your pcre2grep binary +has support for one or both of these file types by running it with the +--help option. If the appropriate support is not present, all files are +treated as plain text. The standard input is always so treated. If a file with +a .gz or .bz2 extension is not in fact compressed, it is read as a +plain text file. When input is from a compressed .gz or .bz2 file, the +--line-buffered option is ignored. +

+
BINARY FILES
+

+By default, a file that contains a binary zero byte within the first 1024 bytes +is identified as a binary file, and is processed specially. However, if the +newline type is specified as NUL, that is, the line terminator is a binary +zero, the test for a binary file is not applied. See the --binary-files +option for a means of changing the way binary files are handled. +

+
BINARY ZEROS IN PATTERNS
+

+Patterns passed from the command line are strings that are terminated by a +binary zero, so cannot contain internal zeros. However, patterns that are read +from a file via the -f option may contain binary zeros. +

+
OPTIONS
+

+The order in which some of the options appear can affect the output. For +example, both the -H and -l options affect the printing of file +names. Whichever comes later in the command line will be the one that takes +effect. Similarly, except where noted below, if an option is given twice, the +later setting is used. Numerical values for options may be followed by K or M, +to signify multiplication by 1024 or 1024*1024 respectively. +

+

+-- +This terminates the list of options. It is useful if the next item on the +command line starts with a hyphen but is not an option. This allows for the +processing of patterns and file names that start with hyphens. +

+

+-A number, --after-context=number +Output up to number lines of context after each matching line. Fewer +lines are output if the next match or the end of the file is reached, or if the +processing buffer size has been set too small. If file names and/or line +numbers are being output, a hyphen separator is used instead of a colon for the +context lines (the -Z option can be used to change the file name +terminator to a zero byte). A line containing "--" is output between each group +of lines, unless they are in fact contiguous in the input file. The value of +number is expected to be relatively small. When -c is used, +-A is ignored. +

+

+-a, --text +Treat binary files as text. This is equivalent to +--binary-files=text. +

+

+--allow-lookaround-bsk +PCRE2 now forbids the use of \K in lookarounds by default, in line with Perl. +This option causes pcre2grep to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option, which enables this somewhat dangerous usage. +

+

+-B number, --before-context=number +Output up to number lines of context before each matching line. Fewer +lines are output if the previous match or the start of the file is within +number lines, or if the processing buffer size has been set too small. If +file names and/or line numbers are being output, a hyphen separator is used +instead of a colon for the context lines (the -Z option can be used to +change the file name terminator to a zero byte). A line containing "--" is +output between each group of lines, unless they are in fact contiguous in the +input file. The value of number is expected to be relatively small. When +-c is used, -B is ignored. +

+

+--binary-files=word +Specify how binary files are to be processed. If the word is "binary" (the +default), pattern matching is performed on binary files, but the only output is +"Binary file <name> matches" when a match succeeds. If the word is "text", +which is equivalent to the -a or --text option, binary files are +processed in the same way as any other file. In this case, when a match +succeeds, the output may be binary garbage, which can have nasty effects if +sent to a terminal. If the word is "without-match", which is equivalent to the +-I option, binary files are not processed at all; they are assumed not to +be of interest and are skipped without causing any output or affecting the +return code. +

+

+--buffer-size=number +Set the parameter that controls how much memory is obtained at the start of +processing for buffering files that are being scanned. See also +--max-buffer-size below. +

+

+-C number, --context=number +Output number lines of context both before and after each matching line. +This is equivalent to setting both -A and -B to the same value. +

+

+-c, --count +Do not output lines from the files that are being scanned; instead output the +number of lines that would have been shown, either because they matched, or, if +-v is set, because they failed to match. By default, this count is +exactly the same as the number of lines that would have been output, but if the +-M (multiline) option is used (without -v), there may be more +suppressed lines than the count (that is, the number of matches). +
+
+If no lines are selected, the number zero is output. If several files are +being scanned, a count is output for each of them and the -t option can +be used to cause a total to be output at the end. However, if the +--files-with-matches option is also used, only those files whose counts +are greater than zero are listed. When -c is used, the -A, +-B, and -C options are ignored. +

+

+--colour, --color +If this option is given without any data, it is equivalent to "--colour=auto". +If data is required, it must be given in the same shell item, separated by an +equals sign. +

+

+--colour=value, --color=value +This option specifies under what circumstances the parts of a line that matched +a pattern should be coloured in the output. It is ignored if +--file-offsets, --line-offsets, or --output is set. By +default, output is not coloured. The value for the --colour option (which +is optional, see above) may be "never", "always", or "auto". In the latter +case, colouring happens only if the standard output is connected to a terminal. +More resources are used when colouring is enabled, because pcre2grep has +to search for all possible matches in a line, not just one, in order to colour +them all. +
+
+The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +pcre2grep looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +
+
+If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red. +

+

+-D action, --devices=action +If an input path is not a regular file or a directory, "action" specifies how +it is to be processed. Valid values are "read" (the default) or "skip" +(silently skip the path). +

+

+-d action, --directories=action +If an input path is a directory, "action" specifies how it is to be processed. +Valid values are "read" (the default in non-Windows environments, for +compatibility with GNU grep), "recurse" (equivalent to the -r option), or +"skip" (silently skip the path, the default in Windows environments). In the +"read" case, directories are read as if they were ordinary files. In some +operating systems the effect of reading a directory like this is an immediate +end-of-file; in others it may provoke an error. +

+

+--depth-limit=number +See --match-limit below. +

+

+-E, --case-restrict +When case distinctions are being ignored in Unicode mode, two ASCII letters (K +and S) will by default match Unicode characters U+212A (Kelvin sign) and U+017F +(long S) respectively, as well as their lower case ASCII counterparts. When +this option is set, case equivalences are restricted such that no ASCII +character matches a non-ASCII character, and vice versa. +

+

+-e pattern, --regex=pattern, --regexp=pattern +Specify a pattern to be matched. This option can be used multiple times in +order to specify several patterns. It can also be used as a way of specifying a +single pattern that starts with a hyphen. When -e is used, no argument +pattern is taken from the command line; all arguments are treated as file +names. There is no limit to the number of patterns. They are applied to each +line in the order in which they are defined. +
+
+If -f is used with -e, the command line patterns are matched first, +followed by the patterns from the file(s), independent of the order in which +these options are specified. +

+

+--exclude=pattern +Files (but not directories) whose names match the pattern are skipped without +being processed. This applies to all files, whether listed on the command line, +obtained from --file-list, or by scanning a directory. The pattern is a +PCRE2 regular expression, and is matched against the final component of the +file name, not the entire path. The -F, -w, and -x options do +not apply to this pattern. The option may be given any number of times in order +to specify multiple patterns. If a file name matches both an --include +and an --exclude pattern, it is excluded. There is no short form for this +option. +

+

+--exclude-from=filename +Treat each non-empty line of the file as the data for an --exclude +option. What constitutes a newline when reading the file is the operating +system's default. The --newline option has no effect on this option. This +option may be given more than once in order to specify a number of files to +read. +

+

+--exclude-dir=pattern +Directories whose names match the pattern are skipped without being processed, +whatever the setting of the --recursive option. This applies to all +directories, whether listed on the command line, obtained from +--file-list, or by scanning a parent directory. The pattern is a PCRE2 +regular expression, and is matched against the final component of the directory +name, not the entire path. The -F, -w, and -x options do not +apply to this pattern. The option may be given any number of times in order to +specify more than one pattern. If a directory matches both --include-dir +and --exclude-dir, it is excluded. There is no short form for this +option. +

+

+-F, --fixed-strings +Interpret each data-matching pattern as a list of fixed strings, separated by +newlines, instead of as a regular expression. What constitutes a newline for +this purpose is controlled by the --newline option. The -w (match +as a word) and -x (match whole line) options can be used with -F. +They apply to each of the fixed strings. A line is selected if any of the fixed +strings are found in it (subject to -w or -x, if present). This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the --include or +--exclude options. +

+

+-f filename, --file=filename +Read patterns from the file, one per line. As is the case with patterns on the +command line, no delimiters should be used. What constitutes a newline when +reading the file is the operating system's default interpretation of \n. The +--newline option has no effect on this option. Trailing white space is +removed from each line, and blank lines are ignored unless the +--posix-pattern-file option is also provided. An empty file contains no +patterns and therefore matches nothing. Patterns read from a file in this way +may contain binary zeros, which are treated as ordinary character literals. +
+
+If this option is given more than once, all the specified files are read. A +data line is output if any of the patterns match it. A file name can be given +as "-" to refer to the standard input. When -f is used, patterns +specified on the command line using -e may also be present; they are +matched before the file's patterns. However, no pattern is taken from the +command line; all arguments are treated as the names of paths to be searched. +

+

+--file-list=filename +Read a list of files and/or directories that are to be scanned from the given +file, one per line. What constitutes a newline when reading the file is the +operating system's default. Trailing white space is removed from each line, and +blank lines are ignored. These paths are processed before any that are listed +on the command line. The file name can be given as "-" to refer to the standard +input. If --file and --file-list are both specified as "-", +patterns are read first. This is useful only when the standard input is a +terminal, from which further lines (the list of files) can be read after an +end-of-file indication. If this option is given more than once, all the +specified files are read. +

+

+--file-offsets +Instead of showing lines or parts of lines that match, show each match as an +offset from the start of the file and a length, separated by a comma. In this +mode, --colour has no effect, and no context is shown. That is, the +-A, -B, and -C options are ignored. If there is more than one +match in a line, each of them is shown separately. This option is mutually +exclusive with --output, --line-offsets, and --only-matching. +

+

+--group-separator=text +Output this text string instead of two hyphens between groups of lines when +-A, -B, or -C is in use. See also --no-group-separator. +

+

+-H, --with-filename +Force the inclusion of the file name at the start of output lines when +searching a single file. The file name is not normally shown in this case. +By default, for matching lines, the file name is followed by a colon; for +context lines, a hyphen separator is used. The -Z option can be used to +change the terminator to a zero byte. If a line number is also being output, +it follows the file name. When the -M option causes a pattern to match +more than one line, only the first is preceded by the file name. This option +overrides any previous -h, -l, or -L options. +

+

+-h, --no-filename +Suppress the output file names when searching multiple files. File names are +normally shown when multiple files are searched. By default, for matching +lines, the file name is followed by a colon; for context lines, a hyphen +separator is used. The -Z option can be used to change the terminator to +a zero byte. If a line number is also being output, it follows the file name. +This option overrides any previous -H, -L, or -l options. +

+

+--heap-limit=number +See --match-limit below. +

+

+--help +Output a help message, giving brief details of the command options and file +type support, and then exit. Anything else on the command line is +ignored. +

+

+-I +Ignore binary files. This is equivalent to +--binary-files=without-match. +

+

+-i, --ignore-case +Ignore upper/lower case distinctions when pattern matching. This applies when +matching path names for inclusion or exclusion as well as when matching lines +in files. +

+

+--include=pattern +If any --include patterns are specified, the only files that are +processed are those whose names match one of the patterns and do not match an +--exclude pattern. This option does not affect directories, but it +applies to all files, whether listed on the command line, obtained from +--file-list, or by scanning a directory. The pattern is a PCRE2 regular +expression, and is matched against the final component of the file name, not +the entire path. The -F, -w, and -x options do not apply to +this pattern. The option may be given any number of times. If a file name +matches both an --include and an --exclude pattern, it is excluded. +There is no short form for this option. +

+

+--include-from=filename +Treat each non-empty line of the file as the data for an --include +option. What constitutes a newline for this purpose is the operating system's +default. The --newline option has no effect on this option. This option +may be given any number of times; all the files are read. +

+

+--include-dir=pattern +If any --include-dir patterns are specified, the only directories that +are processed are those whose names match one of the patterns and do not match +an --exclude-dir pattern. This applies to all directories, whether listed +on the command line, obtained from --file-list, or by scanning a parent +directory. The pattern is a PCRE2 regular expression, and is matched against +the final component of the directory name, not the entire path. The -F, +-w, and -x options do not apply to this pattern. The option may be +given any number of times. If a directory matches both --include-dir and +--exclude-dir, it is excluded. There is no short form for this option. +

+

+-L, --files-without-match +Instead of outputting lines from the files, just output the names of the files +that do not contain any lines that would have been output. Each file name is +output once, on a separate line by default, but if the -Z option is set, +they are separated by zero bytes instead of newlines. This option overrides any +previous -H, -h, or -l options. +

+

+-l, --files-with-matches +Instead of outputting lines from the files, just output the names of the files +containing lines that would have been output. Each file name is output once, on +a separate line, but if the -Z option is set, they are separated by zero +bytes instead of newlines. Searching normally stops as soon as a matching line +is found in a file. However, if the -c (count) option is also used, +matching continues in order to obtain the correct count, and those files that +have at least one match are listed along with their counts. Using this option +with -c is a way of suppressing the listing of files with no matches that +occurs with -c on its own. This option overrides any previous -H, +-h, or -L options. +

+

+--label=name +This option supplies a name to be used for the standard input when file names +are being output. If not supplied, "(standard input)" is used. There is no +short form for this option. +

+

+--line-buffered +When this option is given, non-compressed input is read and processed line by +line, and the output is flushed after each write. By default, input is read in +large chunks, unless pcre2grep can determine that it is reading from a +terminal, which is currently possible only in Unix-like environments or +Windows. Output to terminal is normally automatically flushed by the operating +system. This option can be useful when the input or output is attached to a +pipe and you do not want pcre2grep to buffer up large amounts of data. +However, its use will affect performance, and the -M (multiline) option +ceases to work. When input is from a compressed .gz or .bz2 file, +--line-buffered is ignored. +

+

+--line-offsets +Instead of showing lines or parts of lines that match, show each match as a +line number, the offset from the start of the line, and a length. The line +number is terminated by a colon (as usual; see the -n option), and the +offset and length are separated by a comma. In this mode, --colour has no +effect, and no context is shown. That is, the -A, -B, and -C +options are ignored. If there is more than one match in a line, each of them is +shown separately. This option is mutually exclusive with --output, +--file-offsets, and --only-matching. +

+

+--locale=locale-name +This option specifies a locale to be used for pattern matching. It overrides +the value in the LC_ALL or LC_CTYPE environment variables. If no +locale is specified, the PCRE2 library's default (usually the "C" locale) is +used. There is no short form for this option. +

+

+-M, --multiline +Allow patterns to match more than one line. When this option is set, the PCRE2 +library is called in "multiline" mode, and a match is allowed to continue past +the end of the initial line and onto one or more subsequent lines. +
+
+Patterns used with -M may usefully contain literal newline characters and +internal occurrences of ^ and $ characters, because in multiline mode these can +match at internal newlines. Because pcre2grep is scanning multiple lines, +the \Z and \z assertions match only at the end of the last line in the file. +The \A assertion matches at the start of the first line of a match. This can +be any line in the file; it is not anchored to the first line. +
+
+The output for a successful match may consist of more than one line. The first +line is the line in which the match started, and the last line is the line in +which the match ended. If the matched string ends with a newline sequence, the +output ends at the end of that line. If -v is set, none of the lines in a +multi-line match are output. Once a match has been handled, scanning restarts +at the beginning of the line after the one in which the match ended. +
+
+The newline sequence that separates multiple lines must be matched as part of +the pattern. For example, to find the phrase "regular expression" in a file +where "regular" might be at the end of a line and "expression" at the start of +the next line, you could use this command: +

+  pcre2grep -M 'regular\s+expression' <file>
+
+The \s escape sequence matches any white space character, including newlines, +and is followed by + so as to match trailing white space on the first line as +well as possibly handling a two-character newline sequence. +
+
+There is a limit to the number of lines that can be matched, imposed by the way +that pcre2grep buffers the input file as it scans it. With a sufficiently +large processing buffer, this should not be a problem. +
+
+The -M option does not work when input is read line by line (see +--line-buffered.) +

+

+-m number, --max-count=number +Stop processing after finding number matching lines, or non-matching +lines if -v is also set. Any trailing context lines are output after the +final match. In multiline mode, each multiline match counts as just one line +for this purpose. If this limit is reached when reading the standard input from +a regular file, the file is left positioned just after the last matching line. +If -c is also set, the count that is output is never greater than +number. This option has no effect if used with -L, -l, or +-q, or when just checking for a match in a binary file. +

+

+--match-limit=number +Processing some regular expression patterns may take a very long time to search +for all possible matching strings. Others may require a very large amount of +memory. There are three options that set resource limits for matching. +
+
+The --match-limit option provides a means of limiting computing resource +usage when processing patterns that are not going to match, but which have a +very large number of possibilities in their search trees. The classic example +is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a +counter that is incremented each time around its main processing loop. If the +value set by --match-limit is reached, an error occurs. +
+
+The --heap-limit option specifies, as a number of kibibytes (units of +1024 bytes), the maximum amount of heap memory that may be used for matching. +
+
+The --depth-limit option limits the depth of nested backtracking points, +which indirectly limits the amount of memory that is used. The amount of memory +needed for each backtracking point depends on the number of capturing +parentheses in the pattern, so the amount of memory that is used before this +limit acts varies from pattern to pattern. This limit is of use only if it is +set smaller than --match-limit. +
+
+There are no short forms for these options. The default limits can be set +when the PCRE2 library is compiled; if they are not specified, the defaults +are very large and so effectively unlimited. +

+

+--max-buffer-size=number +This limits the expansion of the processing buffer, whose initial size can be +set by --buffer-size. The maximum buffer size is silently forced to be no +smaller than the starting buffer size. +

+

+-N newline-type, --newline=newline-type +Six different conventions for indicating the ends of lines in scanned files are +supported. For example: +

+  pcre2grep -N CRLF 'some pattern' <file>
+
+The newline type may be specified in upper, lower, or mixed case. If the +newline type is NUL, lines are separated by binary zero characters. The other +types are the single-character sequences CR (carriage return) and LF +(linefeed), the two-character sequence CRLF, an "anycrlf" type, which +recognizes any of the preceding three types, and an "any" type, for which any +Unicode line ending sequence is assumed to end a line. The Unicode sequences +are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +
+
+When the PCRE2 library is built, a default line-ending sequence is specified. +This is normally the standard sequence for the operating system. Unless +otherwise specified by this option, pcre2grep uses the library's default. +
+
+This option makes it possible to use pcre2grep to scan files that have +come from other environments without having to modify their line endings. If +the data that is being scanned does not agree with the convention set by this +option, pcre2grep may behave in strange ways. Note that this option does +not apply to files specified by the -f, --exclude-from, or +--include-from options, which are expected to use the operating system's +standard newline sequence. +

+

+-n, --line-number +Precede each output line by its line number in the file, followed by a colon +for matching lines or a hyphen for context lines. If the file name is also +being output, it precedes the line number. When the -M option causes a +pattern to match more than one line, only the first is preceded by its line +number. This option is forced if --line-offsets is used. +

+

+--no-group-separator +Do not output a separator between groups of lines when -A, -B, or +-C is in use. The default is to output a line containing two hyphens. See +also --group-separator. +

+

+--no-jit +If the PCRE2 library is built with support for just-in-time compiling (which +speeds up matching), pcre2grep automatically makes use of this, unless it +was explicitly disabled at build time. This option can be used to disable the +use of JIT at run time. It is provided for testing and working around problems. +It should never be needed in normal use. +

+

+-O text, --output=text +When there is a match, instead of outputting the line that matched, output just +the text specified in this option, followed by an operating-system standard +newline. In this mode, --colour has no effect, and no context is shown. +That is, the -A, -B, and -C options are ignored. The +--newline option has no effect on this option, which is mutually +exclusive with --only-matching, --file-offsets, and +--line-offsets. However, like --only-matching, if there is more +than one match in a line, each of them causes a line of output. +
+
+Escape sequences starting with a dollar character may be used to insert the +contents of the matched part of the line and/or captured substrings into the +text. +
+
+$<digits> or ${<digits>} is replaced by the captured substring of the given +decimal number; $& (or the legacy $0) substitutes the whole match. If the +number is greater than the number of capturing substrings, or if the capture +is unset, the replacement is empty. +
+
+$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by +newline; $r by carriage return; $t by tab; $v by vertical tab. +
+
+$o<digits> or $o{<digits>} is replaced by the character whose code point is the +given octal number. In the first form, up to three octal digits are processed. +When more digits are needed in Unicode mode to specify a wide character, the +second form must be used. +
+
+$x<digits> or $x{<digits>} is replaced by the character represented by the +given hexadecimal number. In the first form, up to two hexadecimal digits are +processed. When more digits are needed in Unicode mode to specify a wide +character, the second form must be used. +
+
+Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar. +

+

+-o, --only-matching +Show only the part of the line that matched a pattern instead of the whole +line. In this mode, no context is shown. That is, the -A, -B, and +-C options are ignored. If there is more than one match in a line, each +of them is shown separately, on a separate line of output. If -o is +combined with -v (invert the sense of the match to find non-matching +lines), no output is generated, but the return code is set appropriately. If +the matched portion of the line is empty, nothing is output unless the file +name or line number are being printed, in which case they are shown on an +otherwise empty line. This option is mutually exclusive with --output, +--file-offsets and --line-offsets. +

+

+-onumber, --only-matching=number +Show only the part of the line that matched the capturing parentheses of the +given number. Up to 50 capturing parentheses are supported by default. This +limit can be changed via the --om-capture option. A pattern may contain +any number of capturing parentheses, but only those whose number is within the +limit can be accessed by -o. An error occurs if the number specified by +-o is greater than the limit. +
+
+-o0 is the same as -o without a number. Because these options can be +given without an argument (see above), if an argument is present, it must be +given in the same shell item, for example, -o3 or --only-matching=2. The +comments given for the non-argument case above also apply to this option. If +the specified capturing parentheses do not exist in the pattern, or were not +set in the match, nothing is output unless the file name or line number are +being output. +
+
+If this option is given multiple times, multiple substrings are output for each +match, in the order the options are given, and all on one line. For example, +-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and +then 3 again to be output. By default, there is no separator (but see the next +but one option). +

+

+--om-capture=number +Set the number of capturing parentheses that can be accessed by -o. The +default is 50. +

+

+--om-separator=text +Specify a separating string for multiple occurrences of -o. The default +is an empty string. Separating strings are never coloured. +

+

+-P, --no-ucp +Starting from release 10.43, when UTF/Unicode mode is specified with -u +or -U, the PCRE2_UCP option is used by default. This means that the +POSIX classes in patterns match more than just ASCII characters. For example, +[:digit:] matches any Unicode decimal digit. The --no-ucp option +suppresses PCRE2_UCP, thus restricting the POSIX classes to ASCII characters, +as was the case in earlier releases. Note that there are now more fine-grained +option settings within patterns that affect individual classes. For example, +when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while +allowing \w to match Unicode letters and digits. +

+

+--posix-pattern-file +When patterns are provided with the -f option, do not trim trailing +spaces or ignore empty lines in a similar way than other grep tools. To keep +the behaviour consistent with older versions, if the pattern read was +terminated with CRLF (as character literals) then both characters won't be +included as part of it, so if you really need to have pattern ending in '\r', +use a escape sequence or provide it by a different method. +

+

+-q, --quiet +Work quietly, that is, display nothing except error messages. The exit +status indicates whether or not any matches were found. +

+

+-r, --recursive +If any given path is a directory, recursively scan the files it contains, +taking note of any --include and --exclude settings. By default, a +directory is read as a normal file; in some operating systems this gives an +immediate end-of-file. This option is a shorthand for setting the -d +option to "recurse". +

+

+--recursion-limit=number +This is an obsolete synonym for --depth-limit. See --match-limit +above for details. +

+

+-s, --no-messages +Suppress error messages about non-existent or unreadable files. Such files are +quietly skipped. However, the return code is still 2, even if matches were +found in other files. +

+

+-t, --total-count +This option is useful when scanning more than one file. If used on its own, +-t suppresses all output except for a grand total number of matching +lines (or non-matching lines if -v is used) in all the files. If -t +is used with -c, a grand total is output except when the previous output +is just one line. In other words, it is not output when just one file's count +is listed. If file names are being output, the grand total is preceded by +"TOTAL:". Otherwise, it appears as just another number. The -t option is +ignored when used with -L (list files without matches), because the grand +total would always be zero. +

+

+-u, --utf +Operate in UTF/Unicode mode. This option is available only if PCRE2 has been +compiled with UTF-8 support. All patterns (including those for any +--exclude and --include options) and all lines that are scanned +must be valid strings of UTF-8 characters. If an invalid UTF-8 string is +encountered, an error occurs. +

+

+-U, --utf-allow-invalid +As --utf, but in addition subject lines may contain invalid UTF-8 code +unit sequences. These can never form part of any pattern match. Patterns +themselves, however, must still be valid UTF-8 strings. This facility allows +valid UTF-8 strings to be sought within arbitrary byte sequences in executable +or other binary files. For more details about matching in non-valid UTF-8 +strings, see the +pcre2unicode(3) +documentation. +

+

+-V, --version +Write the version numbers of pcre2grep and the PCRE2 library to the +standard output and then exit. Anything else on the command line is +ignored. +

+

+-v, --invert-match +Invert the sense of the match, so that lines which do not match any of +the patterns are the ones that are found. When this option is set, options such +as --only-matching and --output, which specify parts of a match +that are to be output, are ignored. +

+

+-w, --word-regex, --word-regexp +Force the patterns only to match "words". That is, there must be a word +boundary at the start and end of each matched string. This is equivalent to +having "\b(?:" at the start of each pattern, and ")\b" at the end. This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the --include or +--exclude options. +

+

+-x, --line-regex, --line-regexp +Force the patterns to start matching only at the beginnings of lines, and in +addition, require them to match entire lines. In multiline mode the match may +be more than one line. This is equivalent to having "^(?:" at the start of each +pattern and ")$" at the end. This option applies only to the patterns that are +matched against the contents of files; it does not apply to patterns specified +by any of the --include or --exclude options. +

+

+-Z, --null +Terminate files names in the regular output with a zero byte (the NUL +character) instead of what would normally appear. This is useful when file +names contain unusual characters such as colons, hyphens, or even newlines. The +option does not apply to file names in error messages. +

+
ENVIRONMENT VARIABLES
+

+The environment variables LC_ALL and LC_CTYPE are examined, in that +order, for a locale. The first one that is set is used. This can be overridden +by the --locale option. If no locale is set, the PCRE2 library's default +(usually the "C" locale) is used. +

+
NEWLINES
+

+The -N (--newline) option allows pcre2grep to scan files with +newline conventions that differ from the default. This option affects only the +way scanned files are processed. It does not affect the interpretation of files +specified by the -f, --file-list, --exclude-from, or +--include-from options. +

+

+Any parts of the scanned input files that are written to the standard output +are copied with whatever newline sequences they have in the input. However, if +the final line of a file is output, and it does not end with a newline +sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF +or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a +single NL is used. +

+

+The newline setting does not affect the way in which pcre2grep writes +newlines in informational messages to the standard output and error streams. +Under Windows, the standard output is set to be binary, so that "\r\n" at the +ends of output lines that are copied from the input is not converted to +"\r\r\n" by the C I/O library. This means that any messages written to the +standard output must end with "\r\n". For all other operating systems, and +for all messages to the standard error stream, "\n" is used. +

+
OPTIONS COMPATIBILITY WITH GNU GREP
+

+Many of the short and long forms of pcre2grep's options are the same as +in the GNU grep program. Any long option of the form --xxx-regexp +(GNU terminology) is also available as --xxx-regex (PCRE2 terminology). +However, the --case-restrict, --depth-limit, -E, +--file-list, --file-offsets, --heap-limit, +--include-dir, --line-offsets, --locale, --match-limit, +-M, --multiline, -N, --newline, --no-ucp, +--om-separator, --output, -P, -u, --utf, +-U, and --utf-allow-invalid options are specific to +pcre2grep, as is the use of the --only-matching option with a +capturing parentheses number. +

+

+Although most of the common options work the same way, a few are different in +pcre2grep. For example, the --include option's argument is a glob +for GNU grep, but in pcre2grep it is a regular expression to which +the -i option applies. If both the -c and -l options are +given, GNU grep lists only file names, without counts, but pcre2grep +gives the counts as well. +

+
OPTIONS WITH DATA
+

+There are four different ways in which an option with data can be specified. +If a short form option is used, the data may follow immediately, or (with one +exception) in the next command line item. For example: +

+  -f/some/file
+  -f /some/file
+
+The exception is the -o option, which may appear with or without data. +Because of this, if data is present, it must follow immediately in the same +item, for example -o3. +

+

+If a long form option is used, the data may appear in the same command line +item, separated by an equals character, or (with two exceptions) it may appear +in the next command line item. For example: +

+  --file=/some/file
+  --file /some/file
+
+Note, however, that if you want to supply a file name beginning with ~ as data +in a shell command, and have the shell expand ~ to a home directory, you must +separate the file name from the option, because the shell does not treat ~ +specially unless it is at the start of an item. +

+

+The exceptions to the above are the --colour (or --color) and +--only-matching options, for which the data is optional. If one of these +options does have data, it must be given in the first form, using an equals +character. Otherwise pcre2grep will assume that it has no data. +

+
USING PCRE2'S CALLOUT FACILITY
+

+pcre2grep has, by default, support for calling external programs or +scripts or echoing specific strings during matching by making use of PCRE2's +callout facility. However, this support can be completely or partially disabled +when pcre2grep is built. You can find out whether your binary has support +for callouts by running it with the --help option. If callout support is +completely disabled, callouts in patterns are forbidden by pcre2grep. +If the facility is partially disabled, calling external programs is not +supported, and callouts that request it are ignored. +

+

+A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is +either a number or a quoted string (see the +pcre2callout +documentation for details). Numbered callouts are ignored by pcre2grep; +only callouts with string arguments are useful. +

+
+Echoing a specific string +
+

+Starting the callout string with a pipe character invokes an echoing facility +that avoids calling an external program or script. This facility is always +available, provided that callouts were not completely disabled when +pcre2grep was built. The rest of the callout string is processed as a +zero-terminated string, which means it should not contain any internal binary +zeros. It is written to the output, having first been passed through the same +escape processing as text from the --output (-O) option (see +above). However, $0 or $& cannot be used to insert a matched substring because +the match is still in progress. Instead, the single character '0' is inserted. +Any syntax errors in the string (for example, a dollar not followed by another +character) causes the callout to be ignored. No terminator is added to the +output string, so if you want a newline, you must include it explicitly using +the escape $n. For example: +

+  pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
+
+Matching continues normally after the string is output. If you want to see only +the callout output but not any output from an actual match, you should end the +pattern with (*FAIL). +

+
+Calling external programs or scripts +
+

+This facility can be independently disabled when pcre2grep is built. It +is supported for Windows, where a call to _spawnvp() is used, for VMS, +where lib$spawn() is used, and for any Unix-like environment where +fork() and execv() are available. +

+

+If the callout string does not start with a pipe (vertical bar) character, it +is parsed into a list of substrings separated by pipe characters. The first +substring must be an executable name, with the following substrings specifying +arguments: +

+  executable_name|arg1|arg2|...
+
+Any substring (including the executable name) may contain escape sequences +started by a dollar character. These are the same as for the --output +(-O) option documented above, except that $0 or $& cannot insert the +matched string because the match is still in progress. Instead, the character +'0' is inserted. If you need a literal dollar or pipe character in any +substring, use $$ or $| respectively. Here is an example: +
+  echo -e "abcde\n12345" | pcre2grep \
+    '(?x)(.)(..(.))
+    (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
+
+  Output:
+
+    Arg1: [a] [bcd] [d] Arg2: |a| ()
+    abcde
+    Arg1: [1] [234] [4] Arg2: |1| ()
+    12345
+
+The parameters for the system call that is used to run the program or script +are zero-terminated strings. This means that binary zero characters in the +callout argument will cause premature termination of their substrings, and +therefore should not be present. Any syntax errors in the string (for example, +a dollar not followed by another character) causes the callout to be ignored. +If running the program fails for any reason (including the non-existence of the +executable), a local matching failure occurs and the matcher backtracks in the +normal way. +

+
MATCHING ERRORS
+

+It is possible to supply a regular expression that takes a very long time to +fail to match certain lines. Such patterns normally involve nested indefinite +repeats, for example: (a+)*\d when matched against a line of a's with no final +digit. The PCRE2 matching function has a resource limit that causes it to abort +in these circumstances. If this happens, pcre2grep outputs an error +message and the line that caused the problem to the standard error stream. If +there are more than 20 such errors, pcre2grep gives up. +

+

+The --match-limit option of pcre2grep can be used to set the +overall resource limit. There are also other limits that affect the amount of +memory used during matching; see the discussion of --heap-limit and +--depth-limit above. +

+
DIAGNOSTICS
+

+Exit status is 0 if any matches were found, 1 if no matches were found, and 2 +for syntax errors, overlong lines, non-existent or inaccessible files (even if +matches were found in other files) or too many matching errors. Using the +-s option to suppress error messages about inaccessible files does not +affect the return code. +

+

+When run under VMS, the return code is placed in the symbol PCRE2GREP_RC +because VMS does not distinguish between exit(0) and exit(1). +

+
SEE ALSO
+

+pcre2pattern(3), pcre2syntax(3), pcre2callout(3), +pcre2unicode(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 04 February 2025 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2jit.html b/3rd/pcre2/doc/html/pcre2jit.html new file mode 100644 index 00000000..6835cd88 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2jit.html @@ -0,0 +1,505 @@ + + +pcre2jit specification + + +

pcre2jit man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 JUST-IN-TIME COMPILER SUPPORT
+

+Just-in-time compiling is a heavyweight optimization that can greatly speed up +pattern matching. However, it comes at the cost of extra processing before the +match is performed, so it is of most benefit when the same pattern is going to +be matched many times. This does not necessarily mean many calls of a matching +function; if the pattern is not anchored, matching attempts may take place many +times at various positions in the subject, even for a single call. Therefore, +if the subject string is very long, it may still pay to use JIT even for +one-off matches. JIT support is available for all of the 8-bit, 16-bit and +32-bit PCRE2 libraries. +

+

+JIT support applies only to the traditional Perl-compatible matching function. +It does not apply when the DFA matching function is being used. The code for +JIT support was written by Zoltan Herczeg. +

+
AVAILABILITY OF JIT SUPPORT
+

+JIT support is an optional feature of PCRE2. The "configure" option +--enable-jit (or equivalent CMake option) must be set when PCRE2 is built if +you want to use JIT. The support is limited to the following hardware +platforms: +

+  ARM 32-bit (v7, and Thumb2)
+  ARM 64-bit
+  IBM s390x 64 bit
+  Intel x86 32-bit and 64-bit
+  LoongArch 64 bit
+  MIPS 32-bit and 64-bit
+  Power PC 32-bit and 64-bit
+  RISC-V 32-bit and 64-bit
+
+If --enable-jit is set on an unsupported platform, compilation fails. +

+

+A client program can tell if JIT support has been compiled by calling +pcre2_config() with the PCRE2_CONFIG_JIT option. The result is one if +PCRE2 was built with JIT support, and zero otherwise. However, having the JIT +code available does not guarantee that it will be used for any particular +match. One reason for this is that there are a number of options and pattern +items that are +not supported by JIT +(see below). Another reason is that in some environments JIT is unable to get +executable memory in which to build its compiled code. The only guarantee from +pcre2_config() is that if it returns zero, JIT will definitely not +be used. +

+

+As of release 10.45 there is a more informative way to test for JIT support. If +pcre2_compile_jit() is called with the single option PCRE2_JIT_TEST_ALLOC +it returns zero if JIT is available and has a working allocator. Otherwise it +returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate executable +memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not compiled. The +code argument is ignored, so it can be a NULL value. +

+

+A simple program does not need to check availability in order to use JIT when +possible. The API is implemented in a way that falls back to the interpretive +code if JIT is not available or cannot be used for a given match. For programs +that need the best possible performance, there is a +"fast path" +API that is JIT-specific. +

+
SIMPLE USE OF JIT
+

+To make use of the JIT support in the simplest way, all you have to do is to +call pcre2_jit_compile() after successfully compiling a pattern with +pcre2_compile(). This function has two arguments: the first is the +compiled pattern pointer that was returned by pcre2_compile(), and the +second is zero or more of the following option bits: PCRE2_JIT_COMPLETE, +PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. +

+

+If JIT support is not available, a call to pcre2_jit_compile() does +nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern +is passed to the JIT compiler, which turns it into machine code that executes +much faster than the normal interpretive code, but yields exactly the same +results. The returned value from pcre2_jit_compile() is zero on success, +or a negative error code. +

+

+There is a limit to the size of pattern that JIT supports, imposed by the size +of machine stack that it uses. The exact rules are not documented because they +may change at any time, in particular, when new optimizations are introduced. +If a pattern is too big, a call to pcre2_jit_compile() returns +PCRE2_ERROR_NOMEMORY. +

+

+PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete +matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or +PCRE2_PARTIAL_SOFT options of pcre2_match(), you should set one or both +of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT +compiler generates different optimized code for each of the three modes +(normal, soft partial, hard partial). When pcre2_match() is called, the +appropriate code is run if it is available. Otherwise, the pattern is matched +using interpretive code. +

+

+You can call pcre2_jit_compile() multiple times for the same compiled +pattern. It does nothing if it has previously compiled code for any of the +option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and +(perhaps later, when you find you need partial matching) again with +PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore +PCRE2_JIT_COMPLETE and just compile code for partial matching. If +pcre2_jit_compile() is called with no option bits set, it immediately +returns zero. This is an alternative way of testing whether JIT support has +been compiled. +

+

+At present, it is not possible to free JIT compiled code except when the entire +compiled pattern is freed by calling pcre2_code_free(). +

+

+In some circumstances you may need to call additional functions. These are +described in the section entitled +"Controlling the JIT stack" +below. +

+

+There are some pcre2_match() options that are not supported by JIT, and +there are also some pattern items that JIT cannot handle. Details are given +below. +In both cases, matching automatically falls back to the interpretive code. If +you want to know whether JIT was actually used for a particular match, you +should arrange for a JIT callback function to be set up as described in the +section entitled +"Controlling the JIT stack" +below, even if you do not need to supply a non-default JIT stack. Such a +callback function is called whenever JIT code is about to be obeyed. If the +match-time options are not right for JIT execution, the callback function is +not obeyed. +

+

+If the JIT compiler finds an unsupported item, no JIT data is generated. You +can find out if JIT compilation was successful for a compiled pattern by +calling pcre2_pattern_info() with the PCRE2_INFO_JITSIZE option. A +non-zero result means that JIT compilation was successful. A result of 0 means +that JIT support is not available, or the pattern was not processed by +pcre2_jit_compile(), or the JIT compiler was not able to handle the +pattern. Successful JIT compilation does not, however, guarantee the use of JIT +at match time because there are some match time options that are not supported +by JIT. +

+
MATCHING SUBJECTS CONTAINING INVALID UTF
+

+When a pattern is compiled with the PCRE2_UTF option, subject strings are +normally expected to be a valid sequence of UTF code units. By default, this is +checked at the start of matching and an error is generated if invalid UTF is +detected. The PCRE2_NO_UTF_CHECK option can be passed to pcre2_match() to +skip the check (for improved performance) if you are sure that a subject string +is valid. If this option is used with an invalid string, the result is +undefined. The calling program may crash or loop or otherwise misbehave. +

+

+However, a way of running matches on strings that may contain invalid UTF +sequences is available. Calling pcre2_compile() with the +PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in +pcre2_match() to support invalid UTF, and, if pcre2_jit_compile() +is subsequently called, the compiled JIT code also supports invalid UTF. +Details of how this support works, in both the JIT and the interpretive cases, +is given in the +pcre2unicode +documentation. +

+

+There is also an obsolete option for pcre2_jit_compile() called +PCRE2_JIT_INVALID_UTF, which currently exists only for backward compatibility. +It is superseded by the pcre2_compile() option PCRE2_MATCH_INVALID_UTF +and should no longer be used. It may be removed in future. +

+
UNSUPPORTED OPTIONS AND PATTERN ITEMS
+

+The pcre2_match() options that are supported for JIT matching are +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and +PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not +supported at match time. +

+

+If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the +use of JIT, forcing matching by the interpreter code. +

+

+The only unsupported pattern items are \C (match a single data unit) when +running in a UTF mode, and a callout immediately before an assertion condition +in a conditional group. +

+
RETURN VALUES FROM JIT MATCHING
+

+When a pattern is matched using JIT, the return values are the same as those +given by the interpretive pcre2_match() code, with the addition of one +new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory used for +the JIT stack was insufficient. See +"Controlling the JIT stack" +below for a discussion of JIT stack usage. +

+

+The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if searching +a very large pattern tree goes on for too long, as it is in the same +circumstance when JIT is not used, but the details of exactly what is counted +are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned +when JIT matching is used. +

+
CONTROLLING THE JIT STACK
+

+When the compiled JIT code runs, it needs a block of memory to use as a stack. +By default, it uses 32KiB on the machine stack. However, some large or +complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT +is given when there is not enough stack. Three functions are provided for +managing blocks of memory for use as JIT stacks. There is further discussion +about the use of JIT stacks in the section entitled +"JIT stack FAQ" +below. +

+

+The pcre2_jit_stack_create() function creates a JIT stack. Its arguments +are a starting size, a maximum size, and a general context (for memory +allocation functions, or NULL for standard memory allocation). It returns a +pointer to an opaque structure of type pcre2_jit_stack, or NULL if there +is an error. The pcre2_jit_stack_free() function is used to free a stack +that is no longer needed. If its argument is NULL, this function returns +immediately, without doing anything. (For the technically minded: the address +space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to +1MiB should be more than enough for any pattern. +

+

+The pcre2_jit_stack_assign() function specifies which stack JIT code +should use. Its arguments are as follows: +

+  pcre2_match_context  *mcontext
+  pcre2_jit_callback    callback
+  void                 *data
+
+The first argument is a pointer to a match context. When this is subsequently +passed to a matching function, its information determines which JIT stack is +used. If this argument is NULL, the function returns immediately, without doing +anything. There are three cases for the values of the other two options: +
+  (1) If callback is NULL and data is NULL, an internal 32KiB block
+      on the machine stack is used. This is the default when a match
+      context is created.
+
+  (2) If callback is NULL and data is not NULL, data must be
+      a pointer to a valid JIT stack, the result of calling
+      pcre2_jit_stack_create().
+
+  (3) If callback is not NULL, it must point to a function that is
+      called with data as an argument at the start of matching, in
+      order to set up a JIT stack. If the return from the callback
+      function is NULL, the internal 32KiB stack is used; otherwise the
+      return value must be a valid JIT stack, the result of calling
+      pcre2_jit_stack_create().
+
+A callback function is obeyed whenever JIT code is about to be run; it is not +obeyed when pcre2_match() is called with options that are incompatible +for JIT matching. A callback function can therefore be used to determine +whether a match operation was executed by JIT or by the interpreter. +

+

+You may safely use the same JIT stack for more than one pattern (either by +assigning directly or by callback), as long as the patterns are matched +sequentially in the same thread. Currently, the only way to set up +non-sequential matches in one thread is to use callouts: if a callout function +starts another match, that match must use a different JIT stack to the one used +for currently suspended match(es). +

+

+In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. +

+

+Strictly speaking, even more is allowed. You can assign the same non-NULL stack +to a match context that is used by any number of patterns, as long as they are +not used for matching by multiple threads at the same time. For example, you +could use the same stack in all compiled patterns, with a global mutex in the +callback to wait until the stack is available for use. However, this is an +inefficient solution, and not recommended. +

+

+This is a suggestion for how a multithreaded program that needs to set up +non-default JIT stacks might operate: +

+  During thread initialization
+    thread_local_var = pcre2_jit_stack_create(...)
+
+  During thread exit
+    pcre2_jit_stack_free(thread_local_var)
+
+  Use a one-line callback function
+    return thread_local_var
+
+All the functions described in this section do nothing if JIT is not available. +

+
JIT STACK FAQ
+

+(1) Why do we need JIT stacks? +
+
+PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack where +the local data of the current node is pushed before checking its child nodes. +Allocating real machine stack on some platforms is difficult. For example, the +stack chain needs to be updated every time if we extend the stack on PowerPC. +Although it is possible, its updating time overhead decreases performance. So +we do the recursion in memory. +

+

+(2) Why don't we simply allocate blocks of memory with malloc()? +
+
+Modern operating systems have a nice feature: they can reserve an address space +instead of allocating memory. We can safely allocate memory pages inside this +address space, so the stack could grow without moving memory data (this is +important because of pointers). Thus we can allocate 1MiB address space, and +use only a single memory page (usually 4KiB) if that is enough. However, we can +still grow up to 1MiB anytime if needed. +

+

+(3) Who "owns" a JIT stack? +
+
+The owner of the stack is the user program, not the JIT studied pattern or +anything else. The user program must ensure that if a stack is being used by +pcre2_match(), (that is, it is assigned to a match context that is passed +to the pattern currently running), that stack must not be used by any other +threads (to avoid overwriting the same memory area). The best practice for +multithreaded programs is to allocate a stack for each thread, and return this +stack through the JIT callback function. +

+

+(4) When should a JIT stack be freed? +
+
+You can free a JIT stack at any time, as long as it will not be used by +pcre2_match() again. When you assign the stack to a match context, only a +pointer is set. There is no reference counting or any other magic. You can free +compiled patterns, contexts, and stacks in any order, anytime. +Just do not call pcre2_match() with a match context pointing to an +already freed stack, as that will cause SEGFAULT. (Also, do not free a stack +currently used by pcre2_match() in another thread). You can also replace +the stack in a context at any time when it is not in use. You should free the +previous stack before assigning a replacement. +

+

+(5) Should I allocate/free a stack every time before/after calling +pcre2_match()? +
+
+No, because this is too costly in terms of resources. However, you could +implement some clever idea which release the stack if it is not used in let's +say two minutes. The JIT callback can help to achieve this without keeping a +list of patterns. +

+

+(6) OK, the stack is for long term memory allocation. But what happens if a +pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the +stack is freed? +
+
+Especially on embedded systems, it might be a good idea to release memory +sometimes without freeing the stack. There is no API for this at the moment. +Probably a function call which returns with the currently allocated memory for +any stack and another which allows releasing memory (shrinking the stack) would +be a good idea if someone needs this. +

+

+(7) This is too much of a headache. Isn't there any better solution for JIT +stack handling? +
+
+No, thanks to Windows. If POSIX threads were used everywhere, we could throw +out this complicated API. +

+
FREEING JIT SPECULATIVE MEMORY
+

+void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); +

+

+The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve +allocation speed. However, in low memory conditions, it might be better to free +all possible memory. You can cause this to happen by calling +pcre2_jit_free_unused_memory(). Its argument is a general context, for custom +memory management, or NULL for standard memory management. +

+
EXAMPLE CODE
+

+This is a single-threaded example that specifies a JIT stack without using a +callback. A real program should include error checking after all the function +calls. +

+  int rc;
+  pcre2_code *re;
+  pcre2_match_data *match_data;
+  pcre2_match_context *mcontext;
+  pcre2_jit_stack *jit_stack;
+
+  re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
+    &errornumber, &erroffset, NULL);
+  rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
+  mcontext = pcre2_match_context_create(NULL);
+  jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL);
+  pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
+  match_data = pcre2_match_data_create(re, 10);
+  rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext);
+  /* Process result */
+
+  pcre2_code_free(re);
+  pcre2_match_data_free(match_data);
+  pcre2_match_context_free(mcontext);
+  pcre2_jit_stack_free(jit_stack);
+
+
+

+
JIT FAST PATH API
+

+Because the API described above falls back to interpreted matching when JIT is +not available, it is convenient for programs that are written for general use +in many environments. However, calling JIT via pcre2_match() does have a +performance impact. Programs that are written for use where JIT is known to be +available, and which need the best possible performance, can instead use a +"fast path" API to call JIT matching directly instead of calling +pcre2_match() (obviously only for patterns that have been successfully +processed by pcre2_jit_compile()). +

+

+The fast path function is called pcre2_jit_match(), and it takes exactly +the same arguments as pcre2_match(). However, the subject string must be +specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported +option bits (for example, PCRE2_ANCHORED and PCRE2_ENDANCHORED) are ignored, as +is the PCRE2_NO_JIT option. The return values are also the same as for +pcre2_match(), plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial +or complete) is requested that was not compiled. +

+

+When you call pcre2_match(), as well as testing for invalid options, a +number of other sanity checks are performed on the arguments. For example, if +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path. If invalid UTF data is passed when PCRE2_MATCH_INVALID_UTF was not +set for pcre2_compile(), the result is undefined. The program may crash +or loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should call pcre2_jit_match() in UTF mode only if you are sure the +subject is valid. +

+

+Bypassing the sanity checks and the pcre2_match() wrapping can give +speedups of more than 10%. +

+
SEE ALSO
+

+pcre2api(3), pcre2unicode(3) +

+
AUTHOR
+

+Philip Hazel (FAQ by Zoltan Herczeg) +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 22 August 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2limits.html b/3rd/pcre2/doc/html/pcre2limits.html new file mode 100644 index 00000000..514c50b2 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2limits.html @@ -0,0 +1,105 @@ + + +pcre2limits specification + + +

pcre2limits man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+SIZE AND OTHER LIMITATIONS +
+

+There are some size limitations in PCRE2 but it is hoped that they will never +in practice be relevant. +

+

+The maximum size of a compiled pattern is approximately 64 thousand code units +for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default +internal linkage size, which is 2 bytes for these libraries. If you want to +process regular expressions that are truly enormous, you can compile PCRE2 with +an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is +rounded up to 4). See the README file in the source distribution and the +pcre2build +documentation for details. In these cases the limit is substantially larger. +However, the speed of execution is slower. In the 32-bit library, the internal +linkage size is always 4. +

+

+The maximum length of a source pattern string is essentially unlimited; it is +the largest number a PCRE2_SIZE variable can hold. However, the program that +calls pcre2_compile() can specify a smaller limit. +

+

+The maximum length (in code units) of a subject string is one less than the +largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned +integer type, usually defined as size_t. Its maximum value (that is +~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated strings +and unset offsets. +

+

+All values in repeating quantifiers must be less than 65536. +

+

+There are two different limits that apply to branches of lookbehind assertions. +If every branch in such an assertion matches a fixed number of characters, +the maximum length of any branch is 65535 characters. If any branch matches a +variable number of characters, then the maximum matching length for every +branch is limited. The default limit is set at compile time, defaulting to 255, +but can be changed by the calling program. +

+

+There is no limit to the number of parenthesized groups, but there can be no +more than 65535 capture groups, and there is a limit to the depth of nesting of +parenthesized subpatterns of all kinds. This is imposed in order to limit the +amount of system stack used at compile time. The default limit can be specified +when PCRE2 is built; if not, the default is set to 250. An application can +change this limit by calling pcre2_set_parens_nest_limit() to set the limit in +a compile context. +

+

+The maximum length of name for a named capture group is 32 code units, and the +maximum number of such groups is 10000. +

+

+The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb +is 255 code units for the 8-bit library and 65535 code units for the 16-bit and +32-bit libraries. +

+

+The maximum length of a string argument to a callout is the largest number a +32-bit unsigned integer can hold. +

+

+The maximum amount of heap memory used for matching is controlled by the heap +limit, which can be set in a pattern or in a match context. The default is a +very large number, effectively unlimited. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 16 August 2023 +
+Copyright © 1997-2023 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2matching.html b/3rd/pcre2/doc/html/pcre2matching.html new file mode 100644 index 00000000..4d023250 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2matching.html @@ -0,0 +1,262 @@ + + +pcre2matching specification + + +

pcre2matching man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 MATCHING ALGORITHMS
+

+This document describes the two different algorithms that are available in +PCRE2 for matching a compiled regular expression against a given subject +string. The "standard" algorithm is the one provided by the pcre2_match() +function. This works in the same as Perl's matching function, and provides a +Perl-compatible matching operation. The just-in-time (JIT) optimization that is +described in the +pcre2jit +documentation is compatible with this function. +

+

+An alternative algorithm is provided by the pcre2_dfa_match() function; +it operates in a different way, and is not Perl-compatible. This alternative +has advantages and disadvantages compared with the standard algorithm, and +these are described below. +

+

+When there is only one possible way in which a given subject string can match a +pattern, the two algorithms give the same answer. A difference arises, however, +when there are multiple possibilities. For example, if the anchored pattern +

+  ^<.*>
+
+is matched against the string +
+  <something> <something else> <something further>
+
+there are three possible answers. The standard algorithm finds only one of +them, whereas the alternative algorithm finds all three. +

+
REGULAR EXPRESSIONS AS TREES
+

+The set of strings that are matched by a regular expression can be represented +as a tree structure. An unlimited repetition in the pattern makes the tree of +infinite size, but it is still a tree. Matching the pattern to a given subject +string (from a given starting point) can be thought of as a search of the tree. +There are two ways to search a tree: depth-first and breadth-first, and these +correspond to the two matching algorithms provided by PCRE2. +

+
THE STANDARD MATCHING ALGORITHM
+

+In the terminology of Jeffrey Friedl's book "Mastering Regular Expressions", +the standard algorithm is an "NFA algorithm". It conducts a depth-first search +of the pattern tree. That is, it proceeds along a single path through the tree, +checking that the subject matches what is required. When there is a mismatch, +the algorithm tries any alternatives at the current point, and if they all +fail, it backs up to the previous branch point in the tree, and tries the next +alternative branch at that level. This often involves backing up (moving to the +left) in the subject string as well. The order in which repetition branches are +tried is controlled by the greedy or ungreedy nature of the quantifier. +

+

+If a leaf node is reached, a matching string has been found, and at that point +the algorithm stops. Thus, if there is more than one possible match, this +algorithm returns the first one that it finds. Whether this is the shortest, +the longest, or some intermediate length depends on the way the alternations +and the greedy or ungreedy repetition quantifiers are specified in the +pattern. +

+

+Because it ends up with a single path through the tree, it is relatively +straightforward for this algorithm to keep track of the substrings that are +matched by portions of the pattern in parentheses. This provides support for +capturing parentheses and backreferences. +

+
THE ALTERNATIVE MATCHING ALGORITHM
+

+This algorithm conducts a breadth-first search of the tree. Starting from the +first matching point in the subject, it scans the subject string from left to +right, once, character by character, and as it does this, it remembers all the +paths through the tree that represent valid matches. In Friedl's terminology, +this is a kind of "DFA algorithm", though it is not implemented as a +traditional finite state machine (it keeps multiple states active +simultaneously). +

+

+Although the general principle of this matching algorithm is that it scans the +subject string only once, without backtracking, there is one exception: when a +lookaround assertion is encountered, the characters following or preceding the +current point have to be independently inspected. +

+

+The scan continues until either the end of the subject is reached, or there are +no more unterminated paths. At this point, terminated paths represent the +different matching possibilities (if there are none, the match has failed). +Thus, if there is more than one possible match, this algorithm finds all of +them, and in particular, it finds the longest. The matches are returned in +the output vector in decreasing order of length. There is an option to stop the +algorithm after the first match (which is necessarily the shortest) is found. +

+

+Note that the size of vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of capturing parentheses in +the pattern. Using pcre2_match_data_create_from_pattern() to create the +match data block is therefore not advisable when doing DFA matching. +

+

+Note also that all the matches that are found start at the same point in the +subject. If the pattern +

+  cat(er(pillar)?)?
+
+is matched against the string "the caterpillar catchment", the result is the +three strings "caterpillar", "cater", and "cat" that start at the fifth +character of the subject. The algorithm does not automatically move on to find +matches that start at later positions. +

+

+PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\d+" is compiled as if it were "a\d++" because there is no point +even considering the possibility of backtracking into the repeated digits. For +DFA matching, this means that only one possible match is found. If you really +do want multiple matches in such cases, either use an ungreedy repeat +("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compiling. +

+

+There are a number of features of PCRE2 regular expressions that are not +supported or behave differently in the alternative matching function. Those +that are not supported cause an error if encountered. +

+

+1. Because the algorithm finds all possible matches, the greedy or ungreedy +nature of repetition quantifiers is not relevant (though it may affect +auto-possessification, as just described). During matching, greedy and ungreedy +quantifiers are treated in exactly the same way. However, possessive +quantifiers can make a difference when what follows could also match what is +quantified, for example in a pattern like this: +

+  ^a++\w!
+
+This pattern matches "aaab!" but not "aaa!", which would be matched by a +non-possessive quantifier. Similarly, if an atomic group is present, it is +matched as if it were a standalone pattern at the current point, and the +longest match is then "locked in" for the rest of the overall pattern. +

+

+2. When dealing with multiple paths through the tree simultaneously, it is not +straightforward to keep track of captured substrings for the different matching +possibilities, and PCRE2's implementation of this algorithm does not attempt to +do this. This means that no captured substrings are available. +

+

+3. Because no substrings are captured, a number of related features are not +available: +
+
+(a) Backreferences; +
+
+(b) Conditional expressions that use a backreference as the condition or test +for a specific group recursion; +
+
+(c) Script runs; +
+
+(d) Scan substring assertions. +

+

+4. Because many paths through the tree may be active, the \K escape sequence, +which resets the start of the match when encountered (but may be on some paths +and not on others), is not supported. +

+

+5. Callouts are supported, but the value of the capture_top field is +always 1, and the value of the capture_last field is always 0. +

+

+6. The \C escape sequence, which (in the standard algorithm) always matches a +single code unit, even in a UTF mode, is not supported in UTF modes because +the alternative algorithm moves through the subject string one character (not +code unit) at a time, for all active paths through the tree. +

+

+7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +supported. (*FAIL) is supported, and behaves like a failing negative assertion. +

+

+8. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not +supported by pcre2_dfa_match(). +

+
ADVANTAGES OF THE ALTERNATIVE ALGORITHM
+

+The main advantage of the alternative algorithm is that all possible matches +(at a single point in the subject) are automatically found, and in particular, +the longest match is found. To find more than one match at the same point using +the standard algorithm, you have to do kludgy things with callouts. +

+

+Partial matching is possible with this algorithm, though it has some +limitations. The +pcre2partial +documentation gives details of partial matching and discusses multi-segment +matching. +

+
DISADVANTAGES OF THE ALTERNATIVE ALGORITHM
+

+The alternative algorithm suffers from a number of disadvantages: +

+

+1. It is substantially slower than the standard algorithm. This is partly +because it has to search for all possible matches, but is also because it is +less susceptible to optimization. +

+

+2. Capturing parentheses and other features such as backreferences that rely on +them are not supported. +

+

+3. Matching within invalid UTF strings is not supported. +

+

+4. Although atomic groups are supported, their use does not provide the +performance advantage that it does for the standard algorithm. +

+

+5. JIT optimization is not supported. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 30 August 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2partial.html b/3rd/pcre2/doc/html/pcre2partial.html new file mode 100644 index 00000000..067064d9 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2partial.html @@ -0,0 +1,408 @@ + + +pcre2partial specification + + +

pcre2partial man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PARTIAL MATCHING IN PCRE2
+

+In normal use of PCRE2, if there is a match up to the end of a subject string, +but more characters are needed to match the entire pattern, PCRE2_ERROR_NOMATCH +is returned, just like any other failing match. There are circumstances where +it might be helpful to distinguish this "partial match" case. +

+

+One example is an application where the subject string is very long, and not +all available at once. The requirement here is to be able to do the matching +segment by segment, but special action is needed when a matched substring spans +the boundary between two segments. +

+

+Another example is checking a user input string as it is typed, to ensure that +it conforms to a required format. Invalid characters can be immediately +diagnosed and rejected, giving instant feedback. +

+

+Partial matching is a PCRE2-specific feature; it is not Perl-compatible. It is +requested by setting one of the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT +options when calling a matching function. The difference between the two +options is whether or not a partial match is preferred to an alternative +complete match, though the details differ between the two types of matching +function. If both options are set, PCRE2_PARTIAL_HARD takes precedence. +

+

+If you want to use partial matching with just-in-time optimized code, as well +as setting a partial match option for the matching function, you must also call +pcre2_jit_compile() with one or both of these options: +

+  PCRE2_JIT_PARTIAL_HARD
+  PCRE2_JIT_PARTIAL_SOFT
+
+PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial +matches on the same pattern. Separate code is compiled for each mode. If the +appropriate JIT mode has not been compiled, interpretive matching code is used. +

+

+Setting a partial matching option disables two of PCRE2's standard +optimization hints. PCRE2 remembers the last literal code unit in a pattern, +and abandons matching immediately if it is not present in the subject string. +This optimization cannot be used for a subject string that might match only +partially. PCRE2 also remembers a minimum length of a matching string, and does +not bother to run the matching function on shorter strings. This optimization +is also disabled for partial matching. +

+
REQUIREMENTS FOR A PARTIAL MATCH
+

+A possible partial match occurs during matching when the end of the subject +string is reached successfully, but either more characters are needed to +complete the match, or the addition of more characters might change what is +matched. +

+

+Example 1: if the pattern is /abc/ and the subject is "ab", more characters are +definitely needed to complete a match. In this case both hard and soft matching +options yield a partial match. +

+

+Example 2: if the pattern is /ab+/ and the subject is "ab", a complete match +can be found, but the addition of more characters might change what is +matched. In this case, only PCRE2_PARTIAL_HARD returns a partial match; +PCRE2_PARTIAL_SOFT returns the complete match. +

+

+On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if the next +pattern item is \z, \Z, \b, \B, or $ there is always a partial match. +Otherwise, for both options, the next pattern item must be one that inspects a +character, and at least one of the following must be true: +

+

+(1) At least one character has already been inspected. An inspected character +need not form part of the final matched string; lookbehind assertions and the +\K escape sequence provide ways of inspecting characters before the start of a +matched string. +

+

+(2) The pattern contains one or more lookbehind assertions. This condition +exists in case there is a lookbehind that inspects characters before the start +of the match. +

+

+(3) There is a special case when the whole pattern can match an empty string. +When the starting point is at the end of the subject, the empty string match is +a possibility, and if PCRE2_PARTIAL_SOFT is set and neither of the above +conditions is true, it is returned. However, because adding more characters +might result in a non-empty match, PCRE2_PARTIAL_HARD returns a partial match, +which in this case means "there is going to be a match at this point, but until +some more characters are added, we do not know if it will be an empty string or +something longer". +

+
PARTIAL MATCHING USING pcre2_match()
+

+When a partial matching option is set, the result of calling +pcre2_match() can be one of the following: +

+

+A successful match +A complete match has been found, starting and ending within this subject. +

+

+PCRE2_ERROR_NOMATCH +No match can start anywhere in this subject. +

+

+PCRE2_ERROR_PARTIAL +Adding more characters may result in a complete match that uses one or more +characters from the end of this subject. +

+

+When a partial match is returned, the first two elements in the ovector point +to the portion of the subject that was matched, but the values in the rest of +the ovector are undefined. The appearance of \K in the pattern has no effect +for a partial match. Consider this pattern: +

+  /abc\K123/
+
+If it is matched against "456abc123xyz" the result is a complete match, and the +ovector defines the matched string as "123", because \K resets the "start of +match" point. However, if a partial match is requested and the subject string +is "456abc12", a partial match is found for the string "abc12", because all +these characters are needed for a subsequent re-match with additional +characters. +

+

+If there is more than one partial match, the first one that was found provides +the data that is returned. Consider this pattern: +

+  /123\w+X|dogY/
+
+If this is matched against the subject string "abc123dog", both alternatives +fail to match, but the end of the subject is reached during matching, so +PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9, identifying +"123dog" as the first partial match. (In this example, there are two partial +matches, because "dog" on its own partially matches the second alternative.) +

+
+How a partial match is processed by pcre2_match() +
+

+What happens when a partial match is identified depends on which of the two +partial matching options is set. +

+

+If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon as a +partial match is found, without continuing to search for possible complete +matches. This option is "hard" because it prefers an earlier partial match over +a later complete match. For this reason, the assumption is made that the end of +the supplied subject string is not the true end of the available data, which is +why \z, \Z, \b, \B, and $ always give a partial match. +

+

+If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but matching +continues as normal, and other alternatives in the pattern are tried. If no +complete match can be found, PCRE2_ERROR_PARTIAL is returned instead of +PCRE2_ERROR_NOMATCH. This option is "soft" because it prefers a complete match +over a partial match. All the various matching items in a pattern behave as if +the subject string is potentially complete; \z, \Z, and $ match at the end of +the subject, as normal, and for \b and \B the end of the subject is treated +as a non-alphanumeric. +

+

+The difference between the two partial matching options can be illustrated by a +pattern such as: +

+  /dog(sbody)?/
+
+This matches either "dog" or "dogsbody", greedily (that is, it prefers the +longer string if possible). If it is matched against the string "dog" with +PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". However, if +PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PARTIAL. On the other +hand, if the pattern is made ungreedy the result is different: +
+  /dog(sbody)??/
+
+In this case the result is always a complete match because that is found first, +and matching never continues after finding a complete match. It might be easier +to follow this explanation by thinking of the two patterns like this: +
+  /dog(sbody)?/    is the same as  /dogsbody|dog/
+  /dog(sbody)??/   is the same as  /dog|dogsbody/
+
+The second pattern will never match "dogsbody", because it will always find the +shorter match first. +

+
+Example of partial matching using pcre2test +
+

+The pcre2test data modifiers partial_hard (or ph) and +partial_soft (or ps) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT, +respectively, when calling pcre2_match(). Here is a run of +pcre2test using a pattern that matches the whole subject in the form of a +date: +

+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 25dec3\=ph
+  Partial match: 23dec3
+  data> 3ju\=ph
+  Partial match: 3ju
+  data> 3juj\=ph
+  No match
+
+This example gives the same results for both hard and soft partial matching +options. Here is an example where there is a difference: +
+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 25jun04\=ps
+   0: 25jun04
+   1: jun
+  data> 25jun04\=ph
+  Partial match: 25jun04
+
+With PCRE2_PARTIAL_SOFT, the subject is matched completely. For +PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, so +there is only a partial match. +

+
MULTI-SEGMENT MATCHING WITH pcre2_match()
+

+PCRE was not originally designed with multi-segment matching in mind. However, +over time, features (including partial matching) that make multi-segment +matching possible have been added. A very long string can be searched segment +by segment by calling pcre2_match() repeatedly, with the aim of achieving +the same results that would happen if the entire string was available for +searching all the time. Normally, the strings that are being sought are much +shorter than each individual segment, and are in the middle of very long +strings, so the pattern is normally not anchored. +

+

+Special logic must be implemented to handle a matched substring that spans a +segment boundary. PCRE2_PARTIAL_HARD should be used, because it returns a +partial match at the end of a segment whenever there is the possibility of +changing the match by adding more characters. The PCRE2_NOTBOL option should +also be set for all but the first segment. +

+

+When a partial match occurs, the next segment must be added to the current +subject and the match re-run, using the startoffset argument of +pcre2_match() to begin at the point where the partial match started. +For example: +

+    re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
+  data> ...the date is 23ja\=ph
+  Partial match: 23ja
+  data> ...the date is 23jan19 and on that day...\=offset=15
+   0: 23jan19
+   1: jan
+
+Note the use of the offset modifier to start the new match where the +partial match was found. In this example, the next segment was added to the one +in which the partial match was found. This is the most straightforward +approach, typically using a memory buffer that is twice the size of each +segment. After a partial match, the first half of the buffer is discarded, the +second half is moved to the start of the buffer, and a new segment is added +before repeating the match as in the example above. After a no match, the +entire buffer can be discarded. +

+

+If there are memory constraints, you may want to discard text that precedes a +partial match before adding the next segment. Unfortunately, this is not at +present straightforward. In cases such as the above, where the pattern does not +contain any lookbehinds, it is sufficient to retain only the partially matched +substring. However, if the pattern contains a lookbehind assertion, characters +that precede the start of the partial match may have been inspected during the +matching process. When pcre2test displays a partial match, it indicates +these characters with '<' if the allusedtext modifier is set: +

+    re> "(?<=123)abc"
+  data> xx123ab\=ph,allusedtext
+  Partial match: 123ab
+                 <<<
+
+However, the allusedtext modifier is not available for JIT matching, +because JIT matching does not record the first (or last) consulted characters. +For this reason, this information is not available via the API. It is therefore +not possible in general to obtain the exact number of characters that must be +retained in order to get the right match result. If you cannot retain the +entire segment, you must find some heuristic way of choosing. +

+

+If you know the approximate length of the matching substrings, you can use that +to decide how much text to retain. The only lookbehind information that is +currently available via the API is the length of the longest individual +lookbehind in a pattern, but this can be misleading if there are nested +lookbehinds. The value returned by calling pcre2_pattern_info() with the +PCRE2_INFO_MAXLOOKBEHIND option is the maximum number of characters (not code +units) that any individual lookbehind moves back when it is processed. A +pattern such as "(?<=(?<!b)a)" has a maximum lookbehind value of one, but +inspects two characters before its starting point. +

+

+In a non-UTF or a 32-bit case, moving back is just a subtraction, but in +UTF-8 or UTF-16 you have to count characters while moving back through the code +units. +

+
PARTIAL MATCHING USING pcre2_dfa_match()
+

+The DFA function moves along the subject string character by character, without +backtracking, searching for all possible matches simultaneously. If the end of +the subject is reached before the end of the pattern, there is the possibility +of a partial match. +

+

+When PCRE2_PARTIAL_SOFT is set, PCRE2_ERROR_PARTIAL is returned only if there +have been no complete matches. Otherwise, the complete matches are returned. +If PCRE2_PARTIAL_HARD is set, a partial match takes precedence over any +complete matches. The portion of the string that was matched when the longest +partial match was found is set as the first matching string. +

+

+Because the DFA function always searches for all possible matches, and there is +no difference between greedy and ungreedy repetition, its behaviour is +different from the pcre2_match(). Consider the string "dog" matched +against this ungreedy pattern: +

+  /dog(sbody)??/
+
+Whereas the standard function stops as soon as it finds the complete match for +"dog", the DFA function also finds the partial match for "dogsbody", and so +returns that when PCRE2_PARTIAL_HARD is set. +

+
MULTI-SEGMENT MATCHING WITH pcre2_dfa_match()
+

+When a partial match has been found using the DFA matching function, it is +possible to continue the match by providing additional subject data and calling +the function again with the same compiled regular expression, this time setting +the PCRE2_DFA_RESTART option. You must pass the same working space as before, +because this is where details of the previous partial match are stored. You can +set the PCRE2_PARTIAL_SOFT or PCRE2_PARTIAL_HARD options with PCRE2_DFA_RESTART +to continue partial matching over multiple segments. Here is an example using +pcre2test: +

+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 23ja\=dfa,ps
+  Partial match: 23ja
+  data> n05\=dfa,dfa_restart
+   0: n05
+
+The first call has "23ja" as the subject, and requests partial matching; the +second call has "n05" as the subject for the continued (restarted) match. +Notice that when the match is complete, only the last part is shown; PCRE2 does +not retain the previously partially-matched string. It is up to the calling +program to do that if it needs to. This means that, for an unanchored pattern, +if a continued match fails, it is not possible to try again at a new starting +point. All this facility is capable of doing is continuing with the previous +match attempt. For example, consider this pattern: +
+  1234|3789
+
+If the first part of the subject is "ABC123", a partial match of the first +alternative is found at offset 3. There is no partial match for the second +alternative, because such a match does not start at the same point in the +subject string. Attempting to continue with the string "7890" does not yield a +match because only those alternatives that match at one point in the subject +are remembered. Depending on the application, this may or may not be what you +want. +

+

+If you do want to allow for starting again at the next character, one way of +doing it is to retain some or all of the segment and try a new complete match, +as described for pcre2_match() above. Another possibility is to work with +two buffers. If a partial match at offset n in the first buffer is +followed by "no match" when PCRE2_DFA_RESTART is used on the second buffer, you +can then try a new match starting at offset n+1 in the first buffer. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 27 November 2024 +
+Copyright © 1997-2019 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2pattern.html b/3rd/pcre2/doc/html/pcre2pattern.html new file mode 100644 index 00000000..84eb0aa1 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2pattern.html @@ -0,0 +1,4140 @@ + + +pcre2pattern specification + + +

pcre2pattern man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 REGULAR EXPRESSION DETAILS
+

+The syntax and semantics of the regular expressions that are supported by PCRE2 +are described in detail below. There is a quick-reference syntax summary in the +pcre2syntax +page. PCRE2 tries to match Perl syntax and semantics as closely as it can. +PCRE2 also supports some alternative regular expression syntax that does not +conflict with the Perl syntax in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. There are in addition some +options that enable alternative syntax and semantics that are not the same as +in Perl. +

+

+Perl's regular expressions are described in its own documentation, and regular +expressions in general are covered in a number of books, some of which have +copious examples. Jeffrey Friedl's "Mastering Regular Expressions", published +by O'Reilly, covers regular expressions in great detail. This description of +PCRE2's regular expressions is intended as reference material. +

+

+This document discusses the regular expression patterns that are supported by +PCRE2 when its main matching function, pcre2_match(), is used. PCRE2 also +has an alternative matching function, pcre2_dfa_match(), which matches +using a different algorithm that is not Perl-compatible. Some of the features +discussed below are not available when DFA matching is used. The advantages and +disadvantages of the alternative function, and how it differs from the normal +function, are discussed in the +pcre2matching +page. +

+
EBCDIC CHARACTER CODES
+

+Most computers use ASCII or Unicode for encoding characters, and PCRE2 assumes +this by default. However, it can be compiled to run in an environment that uses +the EBCDIC code, which is the case for some IBM mainframe operating systems. In +the sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. Differences in behaviour when PCRE2 is running in +an EBCDIC environment are described in the section +"EBCDIC environments" +below, which you can ignore unless you really are in an EBCDIC environment. +

+
SPECIAL START-OF-PATTERN ITEMS
+

+A number of options that can be passed to pcre2_compile() can also be set +by special items at the start of a pattern. These are not Perl-compatible, but +are provided to make these options accessible to pattern writers who are not +able to change the program that processes the pattern. Any number of these +items may appear, but they must all be together right at the start of the +pattern string, and the letters must be in upper case. +

+
+UTF support +
+

+In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either as +single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 can be +specified for the 32-bit library, in which case it constrains the character +values to valid Unicode code points. To process UTF strings, PCRE2 must be +built to include Unicode support (which is the default). When using UTF strings +you must either call the compiling function with one or both of the PCRE2_UTF +or PCRE2_MATCH_INVALID_UTF options, or the pattern must start with the special +sequence (*UTF), which is equivalent to setting the relevant PCRE2_UTF. How +setting a UTF mode affects pattern matching is mentioned in several places +below. There is also a summary of features in the +pcre2unicode +page. +

+

+Some applications that allow their users to supply patterns may wish to +restrict them to non-UTF data for security reasons. If the PCRE2_NEVER_UTF +option is passed to pcre2_compile(), (*UTF) is not allowed, and its +appearance in a pattern causes an error. +

+
+Unicode property support +
+

+Another special sequence that may appear at the start of a pattern is (*UCP). +This has the same effect as setting the PCRE2_UCP option: it causes sequences +such as \d and \w to use Unicode properties to determine character types, +instead of recognizing only characters with codes less than 256 via a lookup +table. If also causes upper/lower casing operations to use Unicode properties +for characters with code points greater than 127, even when UTF is not set. +These behaviours can be changed within the pattern; see the section entitled +"Internal Option Setting" +below. +

+

+Some applications that allow their users to supply patterns may wish to +restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to +pcre2_compile(), (*UCP) is not allowed, and its appearance in a pattern +causes an error. +

+
+Locking out empty string matching +
+

+Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same effect +as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option to whichever +matching function is subsequently called to match the pattern. These options +lock out the matching of empty strings, either entirely, or only at the start +of the subject. +

+
+Disabling auto-possessification +
+

+If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting +the PCRE2_NO_AUTO_POSSESS option, or calling pcre2_set_optimize() with +a PCRE2_AUTO_POSSESS_OFF directive. This stops PCRE2 from making quantifiers +possessive when what follows cannot match the repeated item. For example, by +default a+b is treated as a++b. For more details, see the +pcre2api +documentation. +

+
+Disabling start-up optimizations +
+

+If a pattern starts with (*NO_START_OPT), it has the same effect as setting the +PCRE2_NO_START_OPTIMIZE option, or calling pcre2_set_optimize() with +a PCRE2_START_OPTIMIZE_OFF directive. This disables several optimizations for +quickly reaching "no match" results. For more details, see the +pcre2api +documentation. +

+
+Disabling automatic anchoring +
+

+If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect as +setting the PCRE2_NO_DOTSTAR_ANCHOR option, or calling pcre2_set_optimize() +with a PCRE2_DOTSTAR_ANCHOR_OFF directive. This disables optimizations that +apply to patterns whose top-level branches all start with .* (match any number +of arbitrary characters). For more details, see the +pcre2api +documentation. +

+
+Disabling JIT compilation +
+

+If a pattern that starts with (*NO_JIT) is successfully compiled, an attempt by +the application to apply the JIT optimization by calling +pcre2_jit_compile() is ignored. +

+
+Setting match resource limits +
+

+The pcre2_match() function contains a counter that is incremented every +time it goes round its main loop. The caller of pcre2_match() can set a +limit on this counter, which therefore limits the amount of computing resource +used for a match. The maximum depth of nested backtracking can also be limited; +this indirectly restricts the amount of heap memory that is used, but there is +also an explicit memory limit that can be set. +

+

+These facilities are provided to catch runaway matches that are provoked by +patterns with huge matching trees. A common example is a pattern with nested +unlimited repeats applied to a long string that does not match. When one of +these limits is reached, pcre2_match() gives an error return. The limits +can also be set by items at the start of the pattern of the form +

+  (*LIMIT_HEAP=d)
+  (*LIMIT_MATCH=d)
+  (*LIMIT_DEPTH=d)
+
+where d is any number of decimal digits. However, the value of the setting must +be less than the value set (or defaulted) by the caller of pcre2_match() +for it to have any effect. In other words, the pattern writer can lower the +limits set by the programmer, but not raise them. If there is more than one +setting of one of these limits, the lower value is used. The heap limit is +specified in kibibytes (units of 1024 bytes). +

+

+Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is +still recognized for backwards compatibility. +

+

+The heap limit applies only when the pcre2_match() or +pcre2_dfa_match() interpreters are used for matching. It does not apply +to JIT. The match limit is used (but in a different way) when JIT is being +used, or when pcre2_dfa_match() is called, to limit computing resource +usage by those matching functions. The depth limit is ignored by JIT but is +relevant for DFA matching, which uses function recursion for recursions within +the pattern and for lookaround assertions and atomic groups. In this case, the +depth limit controls the depth of such recursion. +

+
+Newline conventions +
+

+PCRE2 supports six different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, any +Unicode newline sequence, or the NUL character (binary zero). The +pcre2api +page has +further discussion +about newlines, and shows how to set the newline convention when calling +pcre2_compile(). +

+

+It is also possible to specify a newline convention by starting a pattern +string with one of the following sequences: +

+  (*CR)        carriage return
+  (*LF)        linefeed
+  (*CRLF)      carriage return, followed by linefeed
+  (*ANYCRLF)   any of the three above
+  (*ANY)       all Unicode newline sequences
+  (*NUL)       the NUL character (binary zero)
+
+These override the default and the options given to the compiling function. For +example, on a Unix system where LF is the default newline sequence, the pattern +
+  (*CR)a.b
+
+changes the convention to CR. That pattern matches "a\nb" because LF is no +longer a newline. If more than one of these settings is present, the last one +is used. +

+

+The newline convention affects where the circumflex and dollar assertions are +true. It also affects the interpretation of the dot metacharacter when +PCRE2_DOTALL is not set, and the behaviour of \N when not followed by an +opening brace. However, it does not affect what the \R escape sequence +matches. By default, this is any Unicode newline sequence, for Perl +compatibility. However, this can be changed; see the next section and the +description of \R in the section entitled +"Newline sequences" +below. A change of \R setting can be combined with a change of newline +convention. +

+
+Specifying what \R matches +
+

+It is possible to restrict \R to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. This effect can also be achieved by starting a pattern with +(*BSR_ANYCRLF). For completeness, (*BSR_UNICODE) is also recognized, +corresponding to PCRE2_BSR_UNICODE. +

+
CHARACTERS AND METACHARACTERS
+

+A regular expression is a pattern that is matched against a subject string from +left to right. Most characters stand for themselves in a pattern, and match the +corresponding characters in the subject. As a trivial example, the pattern +

+  The quick brown fox
+
+matches a portion of a subject string that is identical to itself. When +caseless matching is specified (the PCRE2_CASELESS option or (?i) within the +pattern), letters are matched independently of case. Note that there are two +ASCII characters, K and S, that, in addition to their lower case ASCII +equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F +(long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the +PCRE2_EXTRA_CASELESS_RESTRICT option is in force (either passed to +pcre2_compile() or set by (*CASELESS_RESTRICT) or (?r) within the +pattern). If the PCRE2_EXTRA_TURKISH_CASING option is in force (either passed +to pcre2_compile() or set by (*TURKISH_CASING) within the pattern), then +the 'i' letters are matched according to Turkish and Azeri languages. +

+

+The power of regular expressions comes from the ability to include wild cards, +character classes, alternatives, and repetitions in the pattern. These are +encoded in the pattern by the use of metacharacters, which do not stand +for themselves but instead are interpreted in some special way. +

+

+There are two different sets of metacharacters: those that are recognized +anywhere in the pattern except within square brackets, and those that are +recognized within square brackets. Outside square brackets, the metacharacters +are as follows: +

+  \      general escape character with several uses
+  ^      assert start of string (or line, in multiline mode)
+  $      assert end of string (or line, in multiline mode)
+  .      match any character except newline (by default)
+  [      start character class definition
+  |      start of alternative branch
+  (      start group or control verb
+  )      end group or control verb
+  *      0 or more quantifier
+  +      1 or more quantifier; also "possessive quantifier"
+  ?      0 or 1 quantifier; also quantifier minimizer
+  {      potential start of min/max quantifier
+
+Brace characters { and } are also used to enclose data for constructions such +as \g{2} or \k{name}. In almost all uses of braces, space and/or horizontal +tab characters that follow { or precede } are allowed and are ignored. In the +case of quantifiers, they may also appear before or after the comma. The +exception to this is \u{...} which is an ECMAScript compatibility feature +that is recognized only when the PCRE2_EXTRA_ALT_BSUX option is set. ECMAScript +does not ignore such white space; it causes the item to be interpreted as +literal. +

+

+Part of a pattern that is in square brackets is called a "character class". In +a character class the only metacharacters are: +

+  \      general escape character
+  ^      negate the class, but only if the first character
+  -      indicates character range
+  [      POSIX character class (if followed by POSIX syntax)
+  ]      terminates the character class
+
+If a pattern is compiled with the PCRE2_EXTENDED option, most white space in +the pattern, other than in a character class, within a \Q...\E sequence, or +between a # outside a character class and the next newline, inclusive, is +ignored. An escaping backslash can be used to include a white space or a # +character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the +same applies, but in addition unescaped space and horizontal tab characters are +ignored inside a character class. Note: only these two characters are ignored, +not the full set of pattern white space characters that are ignored outside a +character class. Option settings can be changed within a pattern; see the +section entitled +"Internal Option Setting" +below. +

+

+The following sections describe the use of each of the metacharacters. +

+
BACKSLASH
+

+The backslash character has several uses. Firstly, if it is followed by a +character that is not a digit or a letter, it takes away any special meaning +that character may have. This use of backslash as an escape character applies +both inside and outside character classes. +

+

+For example, if you want to match a * character, you must write \* in the +pattern. This escaping action applies whether or not the following character +would otherwise be interpreted as a metacharacter, so it is always safe to +precede a non-alphanumeric with backslash to specify that it stands for itself. +In particular, if you want to match a backslash, you write \\. +

+

+Only ASCII digits and letters have any special meaning after a backslash. All +other characters (in particular, those whose code points are greater than 127) +are treated as literals. +

+

+If you want to treat all characters in a sequence as literals, you can do so by +putting them between \Q and \E. Note that this includes white space even when +the PCRE2_EXTENDED option is set so that most other white space is ignored. The +behaviour is different from Perl in that $ and @ are handled as literals in +\Q...\E sequences in PCRE2, whereas in Perl, $ and @ cause variable +interpolation. Also, Perl does "double-quotish backslash interpolation" on any +backslashes between \Q and \E which, its documentation says, "may lead to +confusing results". PCRE2 treats a backslash between \Q and \E just like any +other character. Note the following examples: +

+  Pattern            PCRE2 matches   Perl matches
+
+  \Qabc$xyz\E        abc$xyz        abc followed by the contents of $xyz
+  \Qabc\$xyz\E       abc\$xyz       abc\$xyz
+  \Qabc\E\$\Qxyz\E   abc$xyz        abc$xyz
+  \QA\B\E            A\B            A\B
+  \Q\\E              \              \\E
+
+The \Q...\E sequence is recognized both inside and outside character classes. +An isolated \E that is not preceded by \Q is ignored. If \Q is not followed +by \E later in the pattern, the literal interpretation continues to the end of +the pattern (that is, \E is assumed at the end). If the isolated \Q is inside +a character class, this causes an error, because the character class is then +not terminated by a closing square bracket. +

+

+Another difference from Perl is that any appearance of \Q or \E inside what +might otherwise be a quantifier causes PCRE2 not to recognize the sequence as a +quantifier. Perl recognizes a quantifier if (redundantly) either of the numbers +is inside \Q...\E, but not if the separating comma is. When not recognized as +a quantifier a sequence such as {\Q1\E,2} is treated as the literal string +"{1,2}". +

+
+Non-printing characters +
+

+A second use of backslash provides a way of encoding non-printing characters +in patterns in a visible manner. There is no restriction on the appearance of +non-printing characters in a pattern, but when a pattern is being prepared by +text editing, it is often easier to use one of the following escape sequences +instead of the binary character it represents. In an ASCII or Unicode +environment, these escapes are as follows: +

+  \a          alarm, that is, the BEL character (hex 07)
+  \cx         "control-x", where x is a non-control ASCII character
+  \e          escape (hex 1B)
+  \f          form feed (hex 0C)
+  \n          linefeed (hex 0A)
+  \r          carriage return (hex 0D) (but see below)
+  \t          tab (hex 09)
+  \0dd        character with octal code 0dd
+  \ddd        character with octal code ddd, or back reference
+  \o{ddd..}   character with octal code ddd..
+  \xhh        character with hex code hh
+  \x{hhh..}   character with hex code hhh..
+  \N{U+hhh..} character with Unicode hex code point hhh..
+
+A description of how back references work is given +later, +following the discussion of +parenthesized groups. +

+

+By default, after \x that is not followed by {, one or two hexadecimal +digits are read (letters can be in upper or lower case). If the character that +follows \x is neither { nor a hexadecimal digit, an error occurs. This is +different from Perl's default behaviour, which generates a NUL character, but +is in line with the behaviour of Perl's 'strict' mode in re. +

+

+Any number of hexadecimal digits may appear between \x{ and }. If a character +other than a hexadecimal digit appears between \x{ and }, or if there is no +terminating }, an error occurs. +

+

+Characters whose code points are less than 256 can be defined by either of the +two syntaxes for \x or by an octal sequence. There is no difference in the way +they are handled. For example, \xdc is exactly the same as \x{dc} or \334. +However, using the braced versions does make such sequences easier to read. +

+

+Support is available for some ECMAScript (aka JavaScript) escape sequences via +two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \x followed +by { is not recognized. Only if \x is followed by two hexadecimal digits is it +recognized as a character escape. Otherwise it is interpreted as a literal "x" +character. In this mode, support for code points greater than 256 is provided +by \u, which must be followed by four hexadecimal digits; otherwise it is +interpreted as a literal "u" character. +

+

+PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition, +\u{hhh..} is recognized as the character specified by hexadecimal code point. +There may be any number of hexadecimal digits, but unlike other places that +also use curly brackets, spaces are not allowed and would result in the string +being interpreted as a literal. This syntax is from ECMAScript 6. +

+

+The \N{U+hhh..} escape sequence is recognized only when PCRE2 is operating in +UTF mode. Perl also uses \N{name} to specify characters by Unicode name; PCRE2 +does not support this. Note that when \N is not followed by an opening brace +(curly bracket) it has an entirely different meaning, matching any character +that is not a newline. +

+

+There are some legacy applications where the escape sequence \r is expected to +match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option is set, \r in a +pattern is converted to \n so that it matches a LF (linefeed) instead of a CR +(carriage return) character. +

+

+An error occurs if \c is not followed by a character whose ASCII code point +is in the range 32 to 126. The precise effect of \cx is as follows: if x is a +lower case letter, it is converted to upper case. Then bit 6 of the character +(hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is +5A), but \c{ becomes hex 3B ({ is 7B), and \c; becomes hex 7B (; is 3B). If +the code unit following \c has a code point less than 32 or greater than 126, +a compile-time error occurs. +

+

+For differences in the way some escapes behave in EBCDIC environments, +see section +"EBCDIC environments" +below. +

+
+Octal escapes and back references +
+

+The escape \o must be followed by a sequence of octal digits, enclosed in +braces. An error occurs if this is not the case. This escape provides a way of +specifying character code points as octal numbers greater than 0777, and it +also allows octal numbers and backreferences to be unambiguously distinguished. +

+

+If braces are not used, after \0 up to two further octal digits are read. +However, if the PCRE2_EXTRA_NO_BS0 option is set, at least one more octal digit +must follow \0 (use \00 to generate a NUL character). Make sure you supply +two digits after the initial zero if the pattern character that follows is +itself an octal digit. +

+

+Inside a character class, when a backslash is followed by any octal digit, up +to three octal digits are read to generate a code point. Any subsequent digits +stand for themselves. The sequences \8 and \9 are treated as the literal +characters "8" and "9". +

+

+Outside a character class, Perl's handling of a backslash followed by a digit +other than 0 is complicated by ambiguity, and Perl has changed over time, +causing PCRE2 also to change. From PCRE2 release 10.45 there is an option +called PCRE2_EXTRA_PYTHON_OCTAL that causes PCRE2 to use Python's unambiguous +rules. The next two subsections describe the two sets of rules. +

+

+For greater clarity and unambiguity, it is best to avoid following \ by a +digit greater than zero. Instead, use \o{...} or \x{...} to specify numerical +character code points, and \g{...} to specify backreferences. +

+
+Perl rules for non-class backslash 1-9 +
+

+All the digits that follow the backslash are read as a decimal number. If the +number is less than 10, begins with the digit 8 or 9, or if there are at least +that many previous capture groups in the expression, the entire sequence is +taken as a back reference. Otherwise, up to three octal digits are read to form +a character code. For example: +

+  \040   is another way of writing an ASCII space
+  \40    is the same, provided there are fewer than 40 previous capture groups
+  \7     is always a backreference
+  \11    might be a backreference, or another way of writing a tab
+  \011   is always a tab
+  \0113  is a tab followed by the character "3"
+  \113   might be a backreference, otherwise the character with octal code 113
+  \377   might be a backreference, otherwise the value 255 (decimal)
+  \81    is always a backreference
+
+Note that octal values of 100 or greater that are specified using this syntax +must not be introduced by a leading zero, because no more than three octal +digits are ever read. +

+
+Python rules for non_class backslash 1-9 +
+

+If there are at least three octal digits after the backslash, exactly three are +read as an octal code point number, but the value must be no greater than +\377, even in modes where higher code point values are supported. Any +subsequent digits stand for themselves. If there are fewer than three octal +digits, the sequence is taken as a decimal back reference. Thus, for example, +\12 is always a back reference, independent of how many captures there are in +the pattern. An error is generated for a reference to a non-existent capturing +group. +

+
+Constraints on character values +
+

+Characters that are specified using octal or hexadecimal numbers are +limited to certain values, as follows: +

+  8-bit non-UTF mode    no greater than 0xff
+  16-bit non-UTF mode   no greater than 0xffff
+  32-bit non-UTF mode   no greater than 0xffffffff
+  All UTF modes         no greater than 0x10ffff and a valid code point
+
+Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the +so-called "surrogate" code points). The check for these can be disabled by the +caller of pcre2_compile() by setting the option +PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8 +and UTF-32 modes, because these values are not representable in UTF-16. +

+
+Escape sequences in character classes +
+

+All the sequences that define a single character value can be used both inside +and outside character classes. In addition, inside a character class, \b is +interpreted as the backspace character (hex 08). +

+

+When not followed by an opening brace, \N is not allowed in a character class. +\B, \R, and \X are not special inside a character class. Like other +unrecognized alphabetic escape sequences, they cause an error. Outside a +character class, these sequences have different meanings. +

+
+Unsupported escape sequences +
+

+In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its string +handler and used to modify the case of following characters. By default, PCRE2 +does not support these escape sequences in patterns. However, if either of the +PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U matches a "U" +character, and \u can be used to define a character by code point, as +described above. +

+
+Absolute and relative backreferences +
+

+The sequence \g followed by a signed or unsigned number, optionally enclosed +in braces, is an absolute or relative backreference. A named backreference +can be coded as \g{name}. Backreferences are discussed +later, +following the discussion of +parenthesized groups. +

+
+Absolute and relative subroutine calls +
+

+For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for referencing a capture group as a subroutine. Details are discussed +later. +Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not +synonymous. The former is a backreference; the latter is a +subroutine +call. +

+
+Generic character types +
+

+Another use of backslash is for specifying generic character types: +

+  \d     any decimal digit
+  \D     any character that is not a decimal digit
+  \h     any horizontal white space character
+  \H     any character that is not a horizontal white space character
+  \N     any character that is not a newline
+  \s     any white space character
+  \S     any character that is not a white space character
+  \v     any vertical white space character
+  \V     any character that is not a vertical white space character
+  \w     any "word" character
+  \W     any "non-word" character
+
+The \N escape sequence has the same meaning as +the "." metacharacter +when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change the +meaning of \N. Note that when \N is followed by an opening brace it has a +different meaning. See the section entitled +"Non-printing characters" +above for details. Perl also uses \N{name} to specify characters by Unicode +name; PCRE2 does not support this. +

+

+Each pair of lower and upper case escape sequences partitions the complete set +of characters into two disjoint sets. Any given character matches one, and only +one, of each pair. The sequences can appear both inside and outside character +classes. They each match one character of the appropriate type. If the current +matching point is at the end of the subject string, all of them fail, because +there is no character to match. +

+

+The default \s characters are HT (9), LF (10), VT (11), FF (12), CR (13), and +space (32), which are defined as white space in the "C" locale. This list may +vary if locale-specific matching is taking place. For example, in some locales +the "non-breaking space" character (\xA0) is recognized as white space, and in +others the VT character is not. +

+

+A "word" character is an underscore or any character that is a letter or digit. +By default, the definition of letters and digits is controlled by PCRE2's +low-valued character tables, and may vary if locale-specific matching is taking +place (see +"Locale support" +in the +pcre2api +page). For example, in a French locale such as "fr_FR" in Unix-like systems, +or "french" in Windows, some character codes greater than 127 are used for +accented letters, and these are then matched by \w. The use of locales with +Unicode is discouraged. +

+

+By default, characters whose code points are greater than 127 never match \d, +\s, or \w, and always match \D, \S, and \W, although this may be different +for characters in the range 128-255 when locale-specific matching is happening. +These escape sequences retain their original meanings from before Unicode +support was available, mainly for efficiency reasons. If the PCRE2_UCP option +is set, the behaviour is changed so that Unicode properties are used to +determine character types, as follows: +

+  \d  any character that matches \p{Nd} (decimal digit)
+  \s  any character that matches \p{Z} or \h or \v
+  \w  any character that matches \p{L}, \p{N}, \p{Mn}, or \p{Pc}
+
+The addition of \p{Mn} (non-spacing mark) and the replacement of an explicit +test for underscore with a test for \p{Pc} (connector punctuation) happened in +PCRE2 release 10.43. This brings PCRE2 into line with Perl. +

+

+The upper case escapes match the inverse sets of characters. Note that \d +matches only decimal digits, whereas \w matches any Unicode digit, as well as +other character categories. Note also that PCRE2_UCP affects \b, and +\B because they are defined in terms of \w and \W. Matching these sequences +is noticeably slower when PCRE2_UCP is set. +

+

+The effect of PCRE2_UCP on any one of these escape sequences can be negated by +the options PCRE2_EXTRA_ASCII_BSD, PCRE2_EXTRA_ASCII_BSS, and +PCRE2_EXTRA_ASCII_BSW, respectively. These options can be set and reset within +a pattern by means of an internal option setting +(see below). +

+

+The sequences \h, \H, \v, and \V, in contrast to the other sequences, which +match only ASCII characters by default, always match a specific list of code +points, whether or not PCRE2_UCP is set. The horizontal space characters are: +

+  U+0009     Horizontal tab (HT)
+  U+0020     Space
+  U+00A0     Non-break space
+  U+1680     Ogham space mark
+  U+180E     Mongolian vowel separator
+  U+2000     En quad
+  U+2001     Em quad
+  U+2002     En space
+  U+2003     Em space
+  U+2004     Three-per-em space
+  U+2005     Four-per-em space
+  U+2006     Six-per-em space
+  U+2007     Figure space
+  U+2008     Punctuation space
+  U+2009     Thin space
+  U+200A     Hair space
+  U+202F     Narrow no-break space
+  U+205F     Medium mathematical space
+  U+3000     Ideographic space
+
+The vertical space characters are: +
+  U+000A     Linefeed (LF)
+  U+000B     Vertical tab (VT)
+  U+000C     Form feed (FF)
+  U+000D     Carriage return (CR)
+  U+0085     Next line (NEL)
+  U+2028     Line separator
+  U+2029     Paragraph separator
+
+In 8-bit, non-UTF-8 mode, only the characters with code points less than 256 +are relevant. +

+
+Newline sequences +
+

+Outside a character class, by default, the escape sequence \R matches any +Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent to the +following: +

+  (?>\r\n|\n|\x0b|\f|\r|\x85)
+
+This is an example of an "atomic group", details of which are given +below. +This particular group matches either the two-character sequence CR followed by +LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, +U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next +line, U+0085). Because this is an atomic group, the two-character sequence is +treated as a single unit that cannot be split. +

+

+In other modes, two additional characters whose code points are greater than 255 +are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029). +Unicode support is not needed for these characters to be recognized. +

+

+It is possible to restrict \R to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. (BSR is an abbreviation for "backslash R".) This can be made +the default when PCRE2 is built; if this is the case, the other behaviour can +be requested via the PCRE2_BSR_UNICODE option. It is also possible to specify +these settings by starting a pattern string with one of the following +sequences: +

+  (*BSR_ANYCRLF)   CR, LF, or CRLF only
+  (*BSR_UNICODE)   any Unicode newline sequence
+
+These override the default and the options given to the compiling function. +Note that these special settings, which are not Perl-compatible, are recognized +only at the very start of a pattern, and that they must be in upper case. If +more than one of them is present, the last one is used. They can be combined +with a change of newline convention; for example, a pattern can start with: +
+  (*ANY)(*BSR_ANYCRLF)
+
+They can also be combined with the (*UTF) or (*UCP) special sequences. Inside a +character class, \R is treated as an unrecognized escape sequence, and causes +an error. +

+
+Unicode character properties +
+

+When PCRE2 is built with Unicode support (the default), three additional escape +sequences that match characters with specific properties are available. They +can be used in any mode, though in 8-bit and 16-bit non-UTF modes these +sequences are of course limited to testing characters whose code points are +less than U+0100 or U+10000, respectively. In 32-bit non-UTF mode, code points +greater than 0x10ffff (the Unicode limit) may be encountered. These are all +treated as being in the Unknown script and with an unassigned type. +

+

+Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \d and \w do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +

+

+The extra escape sequences that provide property support are: +

+  \p{xx}   a character with the xx property
+  \P{xx}   a character without the xx property
+  \X       a Unicode extended grapheme cluster
+
+For compatibility with Perl, negation can be specified by including a +circumflex between the opening brace and the property. For example, \p{^Lu} is +the same as \P{Lu}. +

+

+In accordance with Unicode's "loose matching" rules, ASCII white space +characters, hyphens, and underscores are ignored in the properties represented +by xx above. As well as the space character, ASCII white space can be +tab, linefeed, vertical tab, formfeed, or carriage return. +

+

+Some properties are specified as a name only; others as a name and a value, +separated by a colon or an equals sign. The names and values consist of ASCII +letters and digits (with one Perl-specific exception, see below). They are not +case sensitive. Note, however, that the escapes themselves, \p and \P, +are case sensitive. There are abbreviations for many names. The following +examples are all equivalent: +

+  \p{bidiclass=al}
+  \p{BC=al}
+  \p{ Bidi_Class : AL }
+  \p{ Bi-di class = Al }
+  \P{ ^ Bi-di class = Al }
+
+There is support for Unicode script names, Unicode general category properties, +"Any", which matches any character (including newline), Bidi_Class, a number of +binary (yes/no) properties, and some special PCRE2 properties (described +below). +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \P{Any} does not match any characters, so always causes a +match failure. +

+
+Script properties for \p and \P +
+

+There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas +\p{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized and, as for all property specifications, an +equals sign is an alternative to the colon. If a script name is given without a +property type, for example, \p{Adlam}, it is treated as \p{scx:Adlam}. Perl +changed to this interpretation at release 5.26 and PCRE2 changed at release +10.40. +

+

+Unassigned characters (and in non-UTF 32-bit mode, characters with code points +greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not +part of an identified script are lumped together as "Common". The current list +of recognized script names and their 4-character abbreviations can be obtained +by running this command: +

+  pcre2test -LS
+
+
+

+
+The general category property for \p and \P +
+

+Each character has exactly one Unicode general category property, specified by +a two-letter abbreviation. If only one letter is specified with \p or \P, it +includes all the general category properties that start with that letter. In +this case, in the absence of negation, the curly brackets in the escape +sequence are optional; these two examples have the same effect: +

+  \p{L}
+  \pL
+
+The following general category property codes are supported: +
+  C     Other
+  Cc    Control
+  Cf    Format
+  Cn    Unassigned
+  Co    Private use
+  Cs    Surrogate
+
+  L     Letter
+  Lc    Cased letter
+  Ll    Lower case letter
+  Lm    Modifier letter
+  Lo    Other letter
+  Lt    Title case letter
+  Lu    Upper case letter
+
+  M     Mark
+  Mc    Spacing mark
+  Me    Enclosing mark
+  Mn    Non-spacing mark
+
+  N     Number
+  Nd    Decimal number
+  Nl    Letter number
+  No    Other number
+
+  P     Punctuation
+  Pc    Connector punctuation
+  Pd    Dash punctuation
+  Pe    Close punctuation
+  Pf    Final punctuation
+  Pi    Initial punctuation
+  Po    Other punctuation
+  Ps    Open punctuation
+
+  S     Symbol
+  Sc    Currency symbol
+  Sk    Modifier symbol
+  Sm    Mathematical symbol
+  So    Other symbol
+
+  Z     Separator
+  Zl    Line separator
+  Zp    Paragraph separator
+  Zs    Space separator
+
+Perl originally used the name L& for the Lc property. This is still supported +by Perl, but discouraged. PCRE2 also still supports it. This property matches +any character that has the Lu, Ll, or Lt property, in other words, any letter +that is not classified as a modifier or "other". From release 10.45 of PCRE2 +the properties Lu, Ll, and Lt are all treated as Lc when case-independent +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. The +other properties are not affected by caseless matching. +

+

+The Cs (Surrogate) property applies only to characters whose code points are in +the range U+D800 to U+DFFF. These characters are no different to any other +character when PCRE2 is not in UTF mode (using the 16-bit or 32-bit library). +However, they are not valid in Unicode strings and so cannot be tested by PCRE2 +in UTF mode, unless UTF validity checking has been turned off (see the +discussion of PCRE2_NO_UTF_CHECK in the +pcre2api +page). +

+

+The long synonyms for property names that Perl supports (such as \p{Letter}) +are not supported by PCRE2, nor is it permitted to prefix any of these +properties with "Is". +

+

+No character that is in the Unicode table has the Cn (unassigned) property. +Instead, this property is assumed for any code point that is not in the +Unicode table. +

+
+Binary (yes/no) properties for \p and \P +
+

+Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\p and \P, along with their abbreviations, by running this command: +

+  pcre2test -LP
+
+
+

+
+The Bidi_Class property for \p and \P +
+

+

+  \p{Bidi_Class:<class>}   matches a character with the given class
+  \p{BC:<class>}           matches a character with the given class
+
+The recognized classes are: +
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          white space
+
+As in all property specifications, an equals sign may be used instead of a +colon and the class names are case-insensitive. Only the short names listed +above are recognized; PCRE2 does not at present support any long alternatives. +

+
+Extended grapheme clusters +
+

+The \X escape matches any number of Unicode characters that form an "extended +grapheme cluster", and treats the sequence as an atomic group +(see below). +Unicode supports various kinds of composite character by giving each character +a grapheme breaking property, and having rules that use these properties to +define the boundaries of extended grapheme clusters. The rules are defined in +Unicode Standard Annex 29, "Unicode Text Segmentation". Unicode 11.0.0 +abandoned the use of some previous properties that had been used for emojis. +Instead it introduced various emoji-specific properties. PCRE2 uses only the +Extended Pictographic property. +

+

+\X always matches at least one character. Then it decides whether to add +additional characters according to the following rules for ending a cluster: +

+

+1. End at the end of the subject string. +

+

+2. Do not end between CR and LF; otherwise end after any control character. +

+

+3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters +are of five types: L, V, T, LV, and LVT. An L character may be followed by an +L, V, LV, or LVT character; an LV or V character may be followed by a V or T +character; an LVT or T character may be followed only by a T character. +

+

+4. Do not end before extending characters or spacing marks or the zero-width +joiner (ZWJ) character. Characters with the "mark" property always have the +"extend" grapheme breaking property. +

+

+5. Do not end after prepend characters. +

+

+6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width +joiner) sequences. An emoji ZWJ sequence consists of a character with the +Extended_Pictographic property, optionally followed by one or more characters +with the Extend property, followed by the ZWJ character, followed by another +Extended_Pictographic character. +

+

+7. Do not break within emoji flag sequences. That is, do not break between +regional indicator (RI) characters if there are an odd number of RI characters +before the break point. +

+

+8. Otherwise, end the cluster. +

+
+PCRE2's additional properties +
+

+As well as the standard Unicode properties described above, PCRE2 supports four +more that make it possible to convert traditional escape sequences such as \w +and \s to use Unicode properties. PCRE2 uses these non-standard, non-Perl +properties internally when PCRE2_UCP is set. However, they may also be used +explicitly. These properties are: +

+  Xan   Any alphanumeric character
+  Xps   Any POSIX space character
+  Xsp   Any Perl space character
+  Xwd   Any Perl "word" character
+
+Xan matches characters that have either the L (letter) or the N (number) +property. Xps matches the characters tab, linefeed, vertical tab, form feed, or +carriage return, and any other character that has the Z (separator) property +(this includes the space character). Xsp is the same as Xps; in PCRE1 it used +to exclude vertical tab, for Perl compatibility, but Perl changed. Xwd matches +the same characters as Xan, plus those that match Mn (non-spacing mark) or Pc +(connector punctuation, which includes underscore). +

+

+There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +excluded. (Universal Character Names are of the form \uHHHH or \UHHHHHHHH +where H is a hexadecimal digit. Note that the Xuc property does not match these +sequences but the characters that they represent.) +

+
+Resetting the match start +
+

+In normal use, the escape sequence \K causes any previously matched characters +not to be included in the final matched sequence that is returned. For example, +the pattern: +

+  foo\Kbar
+
+matches "foobar", but reports that it has matched "bar". \K does not interact +with anchoring in any way. The pattern: +
+  ^foo\Kbar
+
+matches only when the subject begins with "foobar" (in single line mode), +though it again reports the matched string as "bar". This feature is similar to +a lookbehind assertion +(described below), +but the part of the pattern that precedes \K is not constrained to match a +limited number of characters, as is required for a lookbehind assertion. The +use of \K does not interfere with the setting of +captured substrings. +For example, when the pattern +
+  (foo)\Kbar
+
+matches "foobar", the first substring is still set to "foo". +

+

+From version 5.32.0 Perl forbids the use of \K in lookaround assertions. From +release 10.38 PCRE2 also forbids this by default. However, the +PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling +pcre2_compile() to re-enable the previous behaviour. When this option is +set, \K is acted upon when it occurs inside positive assertions, but is +ignored in negative assertions. Note that when a pattern such as (?=ab\K) +matches, the reported start of the match can be greater than the end of the +match. Using \K in a lookbehind assertion at the start of a pattern can also +lead to odd effects. For example, consider this pattern: +

+  (?<=\Kfoo)bar
+
+If the subject is "foobar", a call to pcre2_match() with a starting +offset of 3 succeeds and reports the matching string as "foobar", that is, the +start of the reported match is earlier than where the match started. +

+
+Simple assertions +
+

+The final use of backslash is for certain simple assertions. An assertion +specifies a condition that has to be met at a particular point in a match, +without consuming any characters from the subject string. The use of +groups for more complicated assertions is described +below. +The backslashed assertions are: +

+  \b     matches at a word boundary
+  \B     matches when not at a word boundary
+  \A     matches at the start of the subject
+  \Z     matches at the end of the subject
+          also matches before a newline at the end of the subject
+  \z     matches only at the end of the subject
+  \G     matches at the first matching position in the subject
+
+Inside a character class, \b has a different meaning; it matches the backspace +character. If any other of these assertions appears in a character class, an +"invalid escape sequence" error is generated. +

+

+A word boundary is a position in the subject string where the current character +and the previous character do not both match \w or \W (i.e. one matches +\w and the other matches \W), or the start or end of the string if the +first or last character matches \w, respectively. When PCRE2 is built with +Unicode support, the meanings of \w and \W can be changed by setting the +PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2 +nor Perl has a separate "start of word" or "end of word" metasequence. However, +whatever follows \b normally determines which it is. For example, the fragment +\ba matches "a" at the start of a word. +

+

+The \A, \Z, and \z assertions differ from the traditional circumflex and +dollar (described in the next section) in that they only ever match at the very +start and end of the subject string, whatever options are set. Thus, they are +independent of multiline mode. These three assertions are not affected by the +PCRE2_NOTBOL or PCRE2_NOTEOL options, which affect only the behaviour of the +circumflex and dollar metacharacters. However, if the startoffset +argument of pcre2_match() is non-zero, indicating that matching is to +start at a point other than the beginning of the subject, \A can never match. +The difference between \Z and \z is that \Z matches before a newline at the +end of the string as well as at the very end, whereas \z matches only at the +end. +

+

+The \G assertion is true only when the current matching position is at the +start point of the matching process, as specified by the startoffset +argument of pcre2_match(). It differs from \A when the value of +startoffset is non-zero. By calling pcre2_match() multiple times +with appropriate arguments, you can mimic Perl's /g option, and it is in this +kind of implementation where \G can be useful. +

+

+Note, however, that PCRE2's implementation of \G, being true at the starting +character of the matching process, is subtly different from Perl's, which +defines it as true at the end of the previous match. In Perl, these can be +different when the previously matched string was empty. Because PCRE2 does just +one match at a time, it cannot reproduce this behaviour. +

+

+If all the alternatives of a pattern begin with \G, the expression is anchored +to the starting match position, and the "anchored" flag is set in the compiled +regular expression. +

+
CIRCUMFLEX AND DOLLAR
+

+The circumflex and dollar metacharacters are zero-width assertions. That is, +they test for a particular condition being true without consuming any +characters from the subject string. These two metacharacters are concerned with +matching the starts and ends of lines. If the newline convention is set so that +only the two-character sequence CRLF is recognized as a newline, isolated CR +and LF characters are treated as ordinary data characters, and are not +recognized as newlines. +

+

+Outside a character class, in the default matching mode, the circumflex +character is an assertion that is true only if the current matching point is at +the start of the subject string. If the startoffset argument of +pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circumflex can +never match if the PCRE2_MULTILINE option is unset. Inside a character class, +circumflex has an entirely different meaning +(see below). +

+

+Circumflex need not be the first character of the pattern if a number of +alternatives are involved, but it should be the first thing in each alternative +in which it appears if the pattern is ever to match that branch. If all +possible alternatives start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is said to be an +"anchored" pattern. (There are also other constructs that can cause a pattern +to be anchored.) +

+

+The dollar character is an assertion that is true only if the current matching +point is at the end of the subject string, or immediately before a newline at +the end of the string (by default), unless PCRE2_NOTEOL is set. Note, however, +that it does not actually match the newline. Dollar need not be the last +character of the pattern if a number of alternatives are involved, but it +should be the last item in any branch in which it appears. Dollar has no +special meaning in a character class. +

+

+The meaning of dollar can be changed so that it matches only at the very end of +the string, by setting the PCRE2_DOLLAR_ENDONLY option at compile time. This +does not affect the \Z assertion. +

+

+The meanings of the circumflex and dollar metacharacters are changed if the +PCRE2_MULTILINE option is set. When this is the case, a dollar character +matches before any newlines in the string, as well as at the very end, and a +circumflex matches immediately after internal newlines as well as at the start +of the subject string. It does not match after a newline that ends the string, +for compatibility with Perl. However, this can be changed by setting the +PCRE2_ALT_CIRCUMFLEX option. +

+

+For example, the pattern /^abc$/ matches the subject string "def\nabc" (where +\n represents a newline) in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode because all branches start with +^ are not anchored in multiline mode, and a match for circumflex is possible +when the startoffset argument of pcre2_match() is non-zero. The +PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set. +

+

+When the newline convention (see +"Newline conventions" +below) recognizes the two-character sequence CRLF as a newline, this is +preferred, even if the single characters CR and LF are also recognized as +newlines. For example, if the newline convention is "any", a multiline mode +circumflex matches before "xyz" in the string "abc\r\nxyz" rather than after +CR, even though CR on its own is a valid newline. (It also matches at the very +start of the string, of course.) +

+

+Note that the sequences \A, \Z, and \z can be used to match the start and +end of the subject in both modes, and if all branches of a pattern start with +\A it is always anchored, whether or not PCRE2_MULTILINE is set. +

+
FULL STOP (PERIOD, DOT) AND \N
+

+Outside a character class, a dot in the pattern matches any one character in +the subject string except (by default) a character that signifies the end of a +line. One or more characters may be specified as line terminators (see +"Newline conventions" +above). +

+

+Dot never matches a single line-ending character. When the two-character +sequence CRLF is the only line ending, dot does not match CR if it is +immediately followed by LF, but otherwise it matches all characters (including +isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurrences +of CR of LF match dot. When all Unicode line endings are being recognized, dot +does not match CR or LF or any of the other line ending characters. +

+

+The behaviour of dot with regard to newlines can be changed. If the +PCRE2_DOTALL option is set, a dot matches any one character, without exception. +If the two-character sequence CRLF is present in the subject string, it takes +two dots to match it. +

+

+The handling of dot is entirely independent of the handling of circumflex and +dollar, the only relationship being that they both involve newlines. Dot has no +special meaning in a character class. +

+

+The escape sequence \N when not followed by an opening brace behaves like a +dot, except that it is not affected by the PCRE2_DOTALL option. In other words, +it matches any character except one that signifies the end of a line. +

+

+When \N is followed by an opening brace it has a different meaning. See the +section entitled +"Non-printing characters" +above for details. Perl also uses \N{name} to specify characters by Unicode +name; PCRE2 does not support this. +

+
MATCHING A SINGLE CODE UNIT
+

+Outside a character class, the escape sequence \C matches any one code unit, +whether or not a UTF mode is set. In the 8-bit library, one code unit is one +byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is a +32-bit unit. Unlike a dot, \C always matches line-ending characters. The +feature is provided in Perl in order to match individual bytes in UTF-8 mode, +but it is unclear how it can usefully be used. +

+

+Because \C breaks up characters into individual code units, matching one unit +with \C in UTF-8 or UTF-16 mode means that the rest of the string may start +with a malformed UTF character. This has undefined results, because PCRE2 +assumes that it is matching character by character in a valid UTF string (by +default it checks the subject string's validity at the start of processing +unless the PCRE2_NO_UTF_CHECK or PCRE2_MATCH_INVALID_UTF option is used). +

+

+An application can lock out the use of \C by setting the +PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to +build PCRE2 with the use of \C permanently disabled. +

+

+PCRE2 does not allow \C to appear in lookbehind assertions +(described below) +in UTF-8 or UTF-16 modes, because this would make it impossible to calculate +the length of the lookbehind. Neither the alternative matching function +pcre2_dfa_match() nor the JIT optimizer support \C in these UTF modes. +The former gives a match-time error; the latter fails to optimize and so the +match is always run using the interpreter. +

+

+In the 32-bit library, however, \C is always supported (when not explicitly +locked out) because it always matches a single code unit, whether or not UTF-32 +is specified. +

+

+In general, the \C escape sequence is best avoided. However, one way of using +it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks): +

+  (?| (?=[\x00-\x7f])(\C) |
+      (?=[\x80-\x{7ff}])(\C)(\C) |
+      (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
+      (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
+
+In this example, a group that starts with (?| resets the capturing parentheses +numbers in each alternative (see +"Duplicate Group Numbers" +below). The assertions at the start of each branch check the next UTF-8 +character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The +character's individual bytes are then captured by the appropriate number of +\C groups. +

+
SQUARE BRACKETS AND CHARACTER CLASSES
+

+An opening square bracket introduces a character class, terminated by a closing +square bracket. A closing square bracket on its own is not special by default. +If a closing square bracket is required as a member of the class, it should be +the first data character in the class (after an initial circumflex, if present) +or escaped with a backslash. This means that, by default, an empty class cannot +be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing +square bracket at the start does end the (empty) class. +

+

+A character class matches a single character in the subject. A matched +character must be in the set of characters defined by the class, unless the +first character in the class definition is a circumflex, in which case the +subject character must not be in the set defined by the class. If a circumflex +is actually required as a member of the class, ensure it is not the first +character, or escape it with a backslash. +

+

+For example, the character class [aeiou] matches any lower case English vowel, +whereas [^aeiou] matches all other characters. Note that a circumflex is just a +convenient notation for specifying the characters that are in the class by +enumerating those that are not. A class that starts with a circumflex is not an +assertion; it still consumes a character from the subject string, and therefore +it fails to match if the current pointer is at the end of the string. +

+

+Characters in a class may be specified by their code points using \o, \x, or +\N{U+hh..} in the usual way. When caseless matching is set, any letters in a +class represent both their upper case and lower case versions, so for example, +a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not +match "A", whereas a caseful version would. Note that there are two ASCII +characters, K and S, that, in addition to their lower case ASCII equivalents, +are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) +respectively when either PCRE2_UTF or PCRE2_UCP is set. If you do not want +these ASCII/non-ASCII case equivalences, you can suppress them by setting +PCRE2_EXTRA_CASELESS_RESTRICT, either as an option in a compile context, or by +including (*CASELESS_RESTRICT) or (?r) within a pattern. +

+

+Characters that might indicate line breaks are never treated in any special way +when matching character classes, whatever line-ending sequence is in use, and +whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A +class such as [^a] always matches one of these characters. +

+

+The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s, +\S, \v, \V, \w, and \W may appear in a character class, and add the +characters that they match to the class. For example, [\dABCDEF] matches any +hexadecimal digit. In UTF modes, the PCRE2_UCP option affects the meanings of +\d, \s, \w and their upper case partners, just as it does when they appear +outside a character class, as described in the section entitled +"Generic character types" +above. The escape sequence \b has a different meaning inside a character +class; it matches the backspace character. The sequences \B, \R, and \X are +not special inside a character class. Like any other unrecognized escape +sequences, they cause an error. The same is true for \N when not followed by +an opening brace. +

+

+The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, +or immediately after a range. For example, [b-d-z] matches letters in the range +b to d, a hyphen character, or z. +

+

+There is some special treatment for alphabetic ranges in EBCDIC environments; +see the section +"EBCDIC environments" +below. +

+

+Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or before or after a character type escape such as \d or \H. +However, unless the hyphen is the last character in the class, Perl outputs a +warning in its warning mode, as this is most likely a user error. As PCRE2 has +no facility for warning, an error is given in these cases. +

+

+It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of a range, so [W-\]46] is interpreted as a class containing a range +and two other characters. The octal or hexadecimal representation of "]" can +also be used to end a range. +

+

+Ranges normally include all code points between the start and end characters, +inclusive. They can also be used for code points specified numerically, for +example [\000-\037]. Ranges can include any characters that are valid for the +current mode. In any UTF mode, the so-called "surrogate" characters (those +whose code points lie between 0xd800 and 0xdfff inclusive) may not be specified +explicitly by default (the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables +this check). However, ranges such as [\x{d7ff}-\x{e000}], which include the +surrogates, are always permitted. +

+

+If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\xc8-\xcb] matches accented E +characters in both cases. +

+

+A circumflex can conveniently be used with the upper case character types to +specify a more restricted set of characters than the matching lower case type. +For example, the class [^\W_] matches any letter or digit, but not underscore, +whereas [\w] includes underscore. A positive character class should be read as +"something OR something OR ..." and a negative class as "NOT something AND NOT +something AND NOT ...". +

+

+The metacharacters that are recognized in character classes are backslash, +hyphen (when it can be interpreted as specifying a range), circumflex +(only at the start), and the terminating closing square bracket. An opening +square bracket is also special when it can be interpreted as introducing a +POSIX class (see +"Posix character classes" +below), or a special compatibility feature (see +"Compatibility feature for word boundaries" +below. Escaping any non-alphanumeric character in a class turns it into a +literal, whether or not it would otherwise be a metacharacter. +

+
PERL EXTENDED CHARACTER CLASSES
+

+From release 10.45 PCRE2 supports Perl's (?[...]) extended character class +syntax. This can be used to perform set operations such as intersection on +character classes. +

+

+The syntax permitted within (?[...]) is quite different to ordinary character +classes. Inside the extended class, there is an expression syntax consisting of +"atoms", operators, and ordinary parentheses "()" used for grouping. Such +classes always have the Perl /xx modifier (PCRE2 option PCRE2_EXTENDED_MORE) +turned on within them. This means that literal space and tab characters are +ignored everywhere in the class. +

+

+The allowed atoms are individual characters specified by escape sequences such +as \n or \x{123}, character types such as \d, POSIX classes such as +[:alpha:], and nested ordinary (non-extended) character classes. For example, +in (?[\d & [...]]) the nested class [...] follows the usual rules for ordinary +character classes, in which parentheses are not metacharacters, and character +literals and ranges are permitted. +

+

+Character literals and ranges may not appear outside a nested ordinary +character class because they are not atoms in the extended syntax. The extended +syntax does not introduce any additional escape sequences, so (?[\y]) is an +unknown escape, as it would be in [\y]. +

+

+In the extended syntax, ^ does not negate a class (except within an +ordinary class nested inside an extended class); it is instead a binary +operator. +

+

+The binary operators are "&" (intersection), "|" or "+" (union), "-" +(subtraction) and "^" (symmetric difference). These are left-associative and +"&" has higher (tighter) precedence, while the others have equal lower +precedence. The one prefix unary operator is "!" (complement), with highest +precedence. +

+
UTS#18 EXTENDED CHARACTER CLASSES
+

+The PCRE2_ALT_EXTENDED_CLASS option enables an alternative to Perl's (?[...]) +syntax, allowing instead extended class behaviour inside ordinary [...] +character classes. This altered syntax for [...] classes is loosely described +by the Unicode standard UTS#18. The PCRE2_ALT_EXTENDED_CLASS option does not +prevent use of (?[...]) classes; it just changes the meaning of all +[...] classes that are not nested inside a Perl (?[...]) class. +

+

+Firstly, in ordinary Perl [...] syntax, an expression such as "[a[]" is a +character class with two literal characters "a" and "[", but in UTS#18 extended +classes the "[" character becomes an additional metacharacter within classes, +denoting the start of a nested class, so a literal "[" must be escaped as "\[". +

+

+Secondly, within the UTS#18 extended syntax, there are operators "||", "&&", +"--" and "~~" which denote character class union, intersection, subtraction, +and symmetric difference respectively. In standard Perl syntax, these would +simply be needlessly-repeated literals (except for "--" which could be the +start or end of a range). In UTS#18 extended classes these operators can be used +in constructs such as [\p{L}--[QW]] for "Unicode letters, other than Q and W". +A literal "-" at the start or end of a range must be escaped, so while "[--1]" +in Perl syntax is the range from hyphen to "1", it must be escaped as "[\--1]" +in UTS#18 extended classes. +

+

+Unlike Perl's (?[...]) extended classes, the PCRE2_EXTENDED_MORE option to +ignore space and tab characters is not automatically enabled for UTS#18 +extended classes, but it is honoured if set. +

+

+Extended UTS#18 classes can be nested, and nested classes are themselves +extended classes (unlike Perl, where nested classes must be simple classes). +For example, [\p{L}&&[\p{Thai}||\p{Greek}]] matches any letter that is in +the Thai or Greek scripts. Note that this means that no special grouping +characters (such as the parentheses used in Perl's (?[...]) class syntax) are +needed. +

+

+Individual class items (literal characters, literal ranges, properties such as +\d or \p{...}, and nested classes) can be combined by juxtaposition or by an +operator. Juxtaposition is the implicit union operator, and binds more tightly +than any explicit operator. Thus a sequence of literals and/or ranges behaves +as if it is enclosed in square brackets. For example, [A-Z0-9&&[^E8]] is the +same as [[A-Z0-9]&&[^E8]], which matches any upper case alphanumeric character +except "E" or "8". +

+

+Precedence between the explicit operators is not defined, so mixing operators +is a syntax error. For example, [A&&B--C] is an error, but [A&&[B--C]] is +valid. +

+

+This is an emerging syntax which is being adopted gradually across the regex +ecosystem: for example JavaScript adopted the "/v" flag in ECMAScript 2024; +Python's "re" module reserves the syntax for future use with a FutureWarning +for unescaped use of "[" as a literal within character classes. Due to UTS#18 +providing insufficient guidance, engines interpret the syntax differently. +Rust's "regex" crate and Python's "regex" PyPi module both implement UTS#18 +extended classes, but with slight incompatibilities ([A||B&&C] is parsed as +[A||[B&&C]] in Python's "regex" but as [[A||B]&&C] in Rust's "regex"). +

+

+PCRE2's syntax adds syntax restrictions similar to ECMASCript's /v flag, so +that all the UTS#18 extended classes accepted as valid by PCRE2 have the +property that they are interpreted either with the same behaviour, or as +invalid, by all other major engines. Please file an issue if you are aware of +cross-engine differences in behaviour between PCRE2 and another major engine. +

+
POSIX CHARACTER CLASSES
+

+Perl supports the POSIX notation for character classes. This uses names +enclosed by [: and :] within the enclosing square brackets. PCRE2 also supports +this notation, in both ordinary and extended classes. For example, +

+  [01[:alpha:]%]
+
+matches "0", "1", any alphabetic character, or "%". The supported class names +are: +
+  alnum    letters and digits
+  alpha    letters
+  ascii    character codes 0 - 127
+  blank    space or tab only
+  cntrl    control characters
+  digit    decimal digits (same as \d)
+  graph    printing characters, excluding space
+  lower    lower case letters
+  print    printing characters, including space
+  punct    printing characters, excluding letters and digits and space
+  space    white space (the same as \s from PCRE2 8.34)
+  upper    upper case letters
+  word     "word" characters (same as \w)
+  xdigit   hexadecimal digits
+
+The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), +and space (32). If locale-specific matching is taking place, the list of space +characters may be different; there may be fewer or more of them. "Space" and +\s match the same set of characters, as do "word" and \w. +

+

+The name "word" is a Perl extension, and "blank" is a GNU extension from Perl +5.8. Another Perl extension is negation, which is indicated by a ^ character +after the colon. For example, +

+  [12[:^digit:]]
+
+matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the POSIX +syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not +supported, and an error is given if they are encountered. +

+

+By default, characters with values greater than 127 do not match any of the +POSIX character classes, although this may be different for characters in the +range 128-255 when locale-specific matching is happening. However, in UCP mode, +unless certain options are set (see below), some of the classes are changed so +that Unicode character properties are used. This is achieved by replacing +POSIX classes with other sequences, as follows: +

+  [:alnum:]  becomes  \p{Xan}
+  [:alpha:]  becomes  \p{L}
+  [:blank:]  becomes  \h
+  [:cntrl:]  becomes  \p{Cc}
+  [:digit:]  becomes  \p{Nd}
+  [:lower:]  becomes  \p{Ll}
+  [:space:]  becomes  \p{Xps}
+  [:upper:]  becomes  \p{Lu}
+  [:word:]   becomes  \p{Xwd}
+
+Negated versions, such as [:^alpha:] use \P instead of \p. Four other POSIX +classes are handled specially in UCP mode: +

+

+[:graph:] +This matches characters that have glyphs that mark the page when printed. In +Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf +properties, except for: +

+  U+061C           Arabic Letter Mark
+  U+180E           Mongolian Vowel Separator
+  U+2066 - U+2069  Various "isolate"s
+
+
+

+

+[:print:] +This matches the same characters as [:graph:] plus space characters that are +not controls, that is, characters with the Zs property. +

+

+[:punct:] +This matches all characters that have the Unicode P (punctuation) property, +plus those characters with code points less than 256 that have the S (Symbol) +property. +

+

+[:xdigit:] +In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" +versions of those characters, whose Unicode code points start at U+FF10. This +is a change that was made in PCRE2 release 10.43 for Perl compatibility. +

+

+The other POSIX classes are unchanged by PCRE2_UCP, and match only characters +with code points less than 256. +

+

+There are two options that can be used to restrict the POSIX classes to ASCII +characters when PCRE2_UCP is set. The option PCRE2_EXTRA_ASCII_DIGIT affects +just [:digit:] and [:xdigit:]. Within a pattern, this can be set and unset by +(?aT) and (?-aT). The PCRE2_EXTRA_ASCII_POSIX option disables UCP processing +for all POSIX classes, including [:digit:] and [:xdigit:]. Within a pattern, +(?aP) and (?-aP) set and unset both these options for consistency. +

+
COMPATIBILITY FEATURE FOR WORD BOUNDARIES
+

+In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly +syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of +word". PCRE2 treats these items as follows: +

+  [[:<:]]  is converted to  \b(?=\w)
+  [[:>:]]  is converted to  \b(?<=\w)
+
+Only these exact character sequences are recognized. A sequence such as +[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is +not compatible with Perl. It is provided to help migrations from other +environments, and is best not used in any new patterns. Note that \b matches +at the start and the end of a word (see +"Simple assertions" +above), and in a Perl-style pattern the preceding or following character +normally shows which is wanted, without the need for the assertions that are +used above in order to give exactly the POSIX behaviour. Note also that the +PCRE2_UCP option changes the meaning of \w (and therefore \b) by default, so +it also affects these POSIX sequences. +

+
VERTICAL BAR
+

+Vertical bar characters are used to separate alternative patterns. For example, +the pattern +

+  gilbert|sullivan
+
+matches either "gilbert" or "sullivan". Any number of alternatives may appear, +and an empty alternative is permitted (matching the empty string). The matching +process tries each alternative in turn, from left to right, and the first one +that succeeds is used. If the alternatives are within a group +(defined below), +"succeeds" means matching the rest of the main pattern as well as the +alternative in the group. +

+
INTERNAL OPTION SETTING
+

+The settings of several options can be changed within a pattern by a sequence +of letters enclosed between "(?" and ")". The following are Perl-compatible, +and are described in detail in the +pcre2api +documentation. The option letters are: +

+  i  for PCRE2_CASELESS
+  m  for PCRE2_MULTILINE
+  n  for PCRE2_NO_AUTO_CAPTURE
+  s  for PCRE2_DOTALL
+  x  for PCRE2_EXTENDED
+  xx for PCRE2_EXTENDED_MORE
+
+For example, (?im) sets caseless, multiline matching. It is also possible to +unset these options by preceding the relevant letters with a hyphen, for +example (?-im). The two "extended" options are not independent; unsetting +either one cancels the effects of both of them. +

+

+A combined setting and unsetting such as (?im-sx), which sets PCRE2_CASELESS +and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and PCRE2_EXTENDED, is also +permitted. Only one hyphen may appear in the options string. If a letter +appears both before and after the hyphen, the option is unset. An empty options +setting "(?)" is allowed. Needless to say, it has no effect. +

+

+If the first character following (? is a circumflex, it causes all of the above +options to be unset. Letters may follow the circumflex to cause some options to +be re-instated, but a hyphen may not appear. +

+

+Some PCRE2-specific options can be changed by the same mechanism using these +pairs or individual letters: +

+  aD for PCRE2_EXTRA_ASCII_BSD
+  aS for PCRE2_EXTRA_ASCII_BSS
+  aW for PCRE2_EXTRA_ASCII_BSW
+  aP for PCRE2_EXTRA_ASCII_POSIX and PCRE2_EXTRA_ASCII_DIGIT
+  aT for PCRE2_EXTRA_ASCII_DIGIT
+  r  for PCRE2_EXTRA_CASELESS_RESTRICT
+  J  for PCRE2_DUPNAMES
+  U  for PCRE2_UNGREEDY
+
+However, except for 'r', these are not unset by (?^), which is equivalent to +(?-imnrsx). If 'a' is not followed by any of the upper case letters shown +above, it sets (or unsets) all the ASCII options. +

+

+PCRE2_EXTRA_ASCII_DIGIT has no additional effect when PCRE2_EXTRA_ASCII_POSIX +is set, but including it in (?aP) means that (?-aP) suppresses all ASCII +restrictions for POSIX classes. +

+

+When one of these option changes occurs at top level (that is, not inside group +parentheses), the change applies until a subsequent change, or the end of the +pattern. An option change within a group (see below for a description of +groups) affects only that part of the group that follows it. At the end of the +group these options are reset to the state they were before the group. For +example, +

+  (a(?i)b)c
+
+matches abc and aBc and no other strings (assuming PCRE2_CASELESS is not set +externally). Any changes made in one alternative do carry on into subsequent +branches within the same group. For example, +
+  (a(?i)b|c)
+
+matches "ab", "aB", "c", and "C", even though when matching "C" the first +branch is abandoned before the option setting. This is because the effects of +option settings happen at compile time. There would be some very weird +behaviour otherwise. +

+

+As a convenient shorthand, if any option settings are required at the start of +a non-capturing group (see the next section), the option letters may +appear between the "?" and the ":". Thus the two patterns +

+  (?i:saturday|sunday)
+  (?:(?i)saturday|sunday)
+
+match exactly the same set of strings. +

+

+Note: There are other PCRE2-specific options, applying to the whole +pattern, which can be set by the application when the compiling function is +called. In addition, the pattern can contain special leading sequences such as +(*CRLF) to override what the application has set or what has been defaulted. +Details are given in the section entitled +"Newline sequences" +above. There are also the (*UTF) and (*UCP) leading sequences that can be used +to set UTF and Unicode property modes; they are equivalent to setting the +PCRE2_UTF and PCRE2_UCP options, respectively. However, the application can set +the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, which lock out the use of the +(*UTF) and (*UCP) sequences. +

+
GROUPS
+

+Groups are delimited by parentheses (round brackets), which can be nested. +Turning part of a pattern into a group does two things: +
+
+1. It localizes a set of alternatives. For example, the pattern +

+  cat(aract|erpillar|)
+
+matches "cataract", "caterpillar", or "cat". Without the parentheses, it would +match "cataract", "erpillar" or an empty string. +
+
+2. It creates a "capture group". This means that, when the whole pattern +matches, the portion of the subject string that matched the group is passed +back to the caller, separately from the portion that matched the whole pattern. +(This applies only to the traditional matching function; the DFA matching +function does not support capturing.) +

+

+Opening parentheses are counted from left to right (starting from 1) to obtain +numbers for capture groups. For example, if the string "the red king" is +matched against the pattern +

+  the ((red|white) (king|queen))
+
+the captured substrings are "red king", "red", and "king", and are numbered 1, +2, and 3, respectively. +

+

+The fact that plain parentheses fulfil two functions is not always helpful. +There are often times when grouping is required without capturing. If an +opening parenthesis is followed by a question mark and a colon, the group +does not do any capturing, and is not counted when computing the number of any +subsequent capture groups. For example, if the string "the white queen" +is matched against the pattern +

+  the ((?:red|white) (king|queen))
+
+the captured substrings are "white queen" and "queen", and are numbered 1 and +2. The maximum number of capture groups is 65535. +

+

+As a convenient shorthand, if any option settings are required at the start of +a non-capturing group, the option letters may appear between the "?" and the +":". Thus the two patterns +

+  (?i:saturday|sunday)
+  (?:(?i)saturday|sunday)
+
+match exactly the same set of strings. Because alternative branches are tried +from left to right, and options are not reset until the end of the group is +reached, an option setting in one branch does affect subsequent branches, so +the above patterns match "SUNDAY" as well as "Saturday". +

+
DUPLICATE GROUP NUMBERS
+

+Perl 5.10 introduced a feature whereby each alternative in a group uses the +same numbers for its capturing parentheses. Such a group starts with (?| and is +itself a non-capturing group. For example, consider this pattern: +

+  (?|(Sat)ur|(Sun))day
+
+Because the two alternatives are inside a (?| group, both sets of capturing +parentheses are numbered one. Thus, when the pattern matches, you can look +at captured substring number one, whichever alternative matched. This construct +is useful when you want to capture part, but not all, of one of a number of +alternatives. Inside a (?| group, parentheses are numbered as usual, but the +number is reset at the start of each branch. The numbers of any capturing +parentheses that follow the whole group start after the highest number used in +any branch. The following example is taken from the Perl documentation. The +numbers underneath show in which buffer the captured content will be stored. +
+  # before  ---------------branch-reset----------- after
+  / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+  # 1            2         2  3        2     3     4
+
+A backreference to a capture group uses the most recent value that is set for +the group. The following pattern matches "abcabc" or "defdef": +
+  /(?|(abc)|(def))\1/
+
+In contrast, a subroutine call to a capture group always refers to the +first one in the pattern with the given number. The following pattern matches +"abcabc" or "defabc": +
+  /(?|(abc)|(def))(?1)/
+
+A relative reference such as (?-1) is no different: it is just a convenient way +of computing an absolute group number. +

+

+If a +condition test +for a group's having matched refers to a non-unique number, the test is +true if any group with that number has matched. +

+

+An alternative approach to using this "branch reset" feature is to use +duplicate named groups, as described in the next section. +

+
NAMED CAPTURE GROUPS
+

+Identifying capture groups by number is simple, but it can be very hard to keep +track of the numbers in complicated patterns. Furthermore, if an expression is +modified, the numbers may change. To help with this difficulty, PCRE2 supports +the naming of capture groups. This feature was not added to Perl until release +5.10. Python had the feature earlier, and PCRE1 introduced it at release 4.0, +using the Python syntax. PCRE2 supports both the Perl and the Python syntax. +

+

+In PCRE2, a capture group can be named in one of three ways: (?<name>...) or +(?'name'...) as in Perl, or (?P<name>...) as in Python. Names may be up to 128 +code units long. When PCRE2_UTF is not set, they may contain only ASCII +alphanumeric characters and underscores, but must start with a non-digit. When +PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode +letter or Unicode decimal digit. In other words, group names must match one of +these patterns: +

+  ^[_A-Za-z][_A-Za-z0-9]*\z   when PCRE2_UTF is not set
+  ^[_\p{L}][_\p{L}\p{Nd}]*\z  when PCRE2_UTF is set
+
+References to capture groups from other parts of the pattern, such as +backreferences, +recursion, +and +conditions, +can all be made by name as well as by number. +

+

+Named capture groups are allocated numbers as well as names, exactly as +if the names were not present. In both PCRE2 and Perl, capture groups +are primarily identified by numbers; any names are just aliases for these +numbers. The PCRE2 API provides function calls for extracting the complete +name-to-number translation table from a compiled pattern, as well as +convenience functions for extracting captured substrings by name. +

+

+Warning: When more than one capture group has the same number, as +described in the previous section, a name given to one of them applies to all +of them. Perl allows identically numbered groups to have different names. +Consider this pattern, where there are two capture groups, both numbered 1: +

+  (?|(?<AA>aa)|(?<BB>bb))
+
+Perl allows this, with both names AA and BB as aliases of group 1. Thus, after +a successful match, both names yield the same value (either "aa" or "bb"). +

+

+In an attempt to reduce confusion, PCRE2 does not allow the same group number +to be associated with more than one name. The example above provokes a +compile-time error. However, there is still scope for confusion. Consider this +pattern: +

+  (?|(?<AA>aa)|(bb))
+
+Although the second group number 1 is not explicitly named, the name AA is +still an alias for any group 1. Whether the pattern matches "aa" or "bb", a +reference by name to group AA yields the matched string. +

+

+By default, a name must be unique within a pattern, except that duplicate names +are permitted for groups with the same number, for example: +

+  (?|(?<AA>aa)|(?<AA>bb))
+
+The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES +option at compile time, or by the use of (?J) within the pattern, as described +in the section entitled +"Internal Option Setting" +above. +

+

+Duplicate names can be useful for patterns where only one instance of the named +capture group can match. Suppose you want to match the name of a weekday, +either as a 3-letter abbreviation or as the full name, and in both cases you +want to extract the abbreviation. This pattern (ignoring the line breaks) does +the job: +

+  (?J)
+  (?<DN>Mon|Fri|Sun)(?:day)?|
+  (?<DN>Tue)(?:sday)?|
+  (?<DN>Wed)(?:nesday)?|
+  (?<DN>Thu)(?:rsday)?|
+  (?<DN>Sat)(?:urday)?
+
+There are five capture groups, but only one is ever set after a match. The +convenience functions for extracting the data by name returns the substring for +the first (and in this example, the only) group of that name that matched. This +saves searching to find which numbered group it was. (An alternative way of +solving this problem is to use a "branch reset" group, as described in the +previous section.) +

+

+If you make a backreference to a non-unique named group from elsewhere in the +pattern, the groups to which the name refers are checked in the order in which +they appear in the overall pattern. The first one that is set is used for the +reference. For example, this pattern matches both "foofoo" and "barbar" but not +"foobar" or "barfoo": +

+  (?J)(?:(?<n>foo)|(?<n>bar))\k<n>
+
+
+

+

+If you make a subroutine call to a non-unique named group, the one that +corresponds to the first occurrence of the name is used. In the absence of +duplicate numbers this is the one with the lowest number. +

+

+If you use a named reference in a condition +test (see the +section about conditions +below), either to check whether a capture group has matched, or to check for +recursion, all groups with the same name are tested. If the condition is true +for any one of them, the overall condition is true. This is the same behaviour +as testing by number. For further details of the interfaces for handling named +capture groups, see the +pcre2api +documentation. +

+
REPETITION
+

+Repetition is specified by quantifiers, which may follow any one of these +items: +

+  a literal data character
+  the dot metacharacter
+  the \C escape sequence
+  the \R escape sequence
+  the \X escape sequence
+  any escape sequence that matches a single character
+  a character class
+  a backreference
+  a parenthesized group (including lookaround assertions)
+  a subroutine call (recursive or otherwise)
+
+If a quantifier does not follow a repeatable item, an error occurs. The +general repetition quantifier specifies a minimum and maximum number of +permitted matches by giving two numbers in curly brackets (braces), separated +by a comma. The numbers must be less than 65536, and the first must be less +than or equal to the second. For example, +
+  z{2,4}
+
+matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special +character. If the second number is omitted, but the comma is present, there is +no upper limit; if the second number and the comma are both omitted, the +quantifier specifies an exact number of required matches. Thus +
+  [aeiou]{3,}
+
+matches at least 3 successive vowels, but may match many more, whereas +
+  \d{8}
+
+matches exactly 8 digits. If the first number is omitted, the lower limit is +taken as zero; in this case the upper limit must be present. +
+  X{,4} is interpreted as X{0,4}
+
+This is a change in behaviour that happened in Perl 5.34.0 and PCRE2 10.43. In +earlier versions such a sequence was not interpreted as a quantifier. Other +regular expression engines may behave either way. +

+

+If the characters that follow an opening brace do not match the syntax of a +quantifier, the brace is taken as a literal character. In particular, this +means that {,} is a literal string of three characters. +

+

+Note that not every opening brace is potentially the start of a quantifier +because braces are used in other items such as \N{U+345} or \k{name}. +

+

+In UTF modes, quantifiers apply to characters rather than to individual code +units. Thus, for example, \x{100}{2} matches two characters, each of +which is represented by a two-byte sequence in a UTF-8 string. Similarly, +\X{3} matches three Unicode extended grapheme clusters, each of which may be +several code units long (and they may be of different lengths). +

+

+The quantifier {0} is permitted, causing the expression to behave as if the +previous item and the quantifier were not present. This may be useful for +capture groups that are referenced as +subroutines +from elsewhere in the pattern (but see also the section entitled +"Defining capture groups for use by reference only" +below). Except for parenthesized groups, items that have a {0} quantifier are +omitted from the compiled pattern. +

+

+For convenience, the three most common quantifiers have single-character +abbreviations: +

+  *    is equivalent to {0,}
+  +    is equivalent to {1,}
+  ?    is equivalent to {0,1}
+
+It is possible to construct infinite loops by following a group that can match +no characters with a quantifier that has no upper limit, for example: +
+  (a?)*
+
+Earlier versions of Perl and PCRE1 used to give an error at compile time for +such patterns. However, because there are cases where this can be useful, such +patterns are now accepted, but whenever an iteration of such a group matches no +characters, matching moves on to the next item in the pattern instead of +repeatedly matching an empty string. This does not prevent backtracking into +any of the iterations if a subsequent item fails to match. +

+

+By default, quantifiers are "greedy", that is, they match as much as possible +(up to the maximum number of permitted repetitions), without causing the rest +of the pattern to fail. The classic example of where this gives problems is in +trying to match comments in C programs. These appear between /* and */ and +within the comment, individual * and / characters may appear. An attempt to +match C comments by applying the pattern +

+  /\*.*\*/
+
+to the string +
+  /* first comment */  not comment  /* second comment */
+
+fails, because it matches the entire string owing to the greediness of the .* +item. However, if a quantifier is followed by a question mark, it ceases to be +greedy, and instead matches the minimum number of times possible, so the +pattern +
+  /\*.*?\*/
+
+does the right thing with C comments. The meaning of the various quantifiers is +not otherwise changed, just the preferred number of matches. Do not confuse +this use of question mark with its use as a quantifier in its own right. +Because it has two uses, it can sometimes appear doubled, as in +
+  \d??\d
+
+which matches one digit by preference, but can match two if that is the only +way the rest of the pattern matches. +

+

+If the PCRE2_UNGREEDY option is set (an option that is not available in Perl), +the quantifiers are not greedy by default, but individual ones can be made +greedy by following them with a question mark. In other words, it inverts the +default behaviour. +

+

+When a parenthesized group is quantified with a minimum repeat count that +is greater than 1 or with a limited maximum, more memory is required for the +compiled pattern, in proportion to the size of the minimum or maximum. +

+

+If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option (equivalent +to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is +implicitly anchored, because whatever follows will be tried against every +character position in the subject string, so there is no point in retrying the +overall match at any position after the first. PCRE2 normally treats such a +pattern as though it were preceded by \A. +

+

+In cases where it is known that the subject string contains no newlines, it is +worth setting PCRE2_DOTALL in order to obtain this optimization, or +alternatively, using ^ to indicate anchoring explicitly. +

+

+However, there are some cases where the optimization cannot be used. When .* +is inside capturing parentheses that are the subject of a backreference +elsewhere in the pattern, a match at the start may fail where a later one +succeeds. Consider, for example: +

+  (.*)abc\1
+
+If the subject is "xyz123abc123" the match point is the fourth character. For +this reason, such a pattern is not implicitly anchored. +

+

+Another case where implicit anchoring is not applied is when the leading .* is +inside an atomic group. Once again, a match at the start may fail where a later +one succeeds. Consider this pattern: +

+  (?>.*?a)b
+
+It matches "ab" in the subject "aab". The use of the backtracking control verbs +(*PRUNE) and (*SKIP) also disable this optimization. To do so explicitly, +either pass the compile option PCRE2_NO_DOTSTAR_ANCHOR, or call +pcre2_set_optimize() with a PCRE2_DOTSTAR_ANCHOR_OFF directive. +

+

+When a capture group is repeated, the value captured is the substring that +matched the final iteration. For example, after +

+  (tweedle[dume]{3}\s*)+
+
+has matched "tweedledum tweedledee" the value of the captured substring is +"tweedledee". However, if there are nested capture groups, the corresponding +captured values may have been set in previous iterations. For example, after +
+  (a|(b))+
+
+matches "aba" the value of the second captured substring is "b". +

+
ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
+

+With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") +repetition, failure of what follows normally causes the repeated item to be +re-evaluated to see if a different number of repeats allows the rest of the +pattern to match. Sometimes it is useful to prevent this, either to change the +nature of the match, or to cause it fail earlier than it otherwise might, when +the author of the pattern knows there is no point in carrying on. +

+

+Consider, for example, the pattern \d+foo when applied to the subject line +

+  123456bar
+
+After matching all 6 digits and then failing to match "foo", the normal +action of the matcher is to try again with only 5 digits matching the \d+ +item, and then with 4, and so on, before ultimately failing. "Atomic grouping" +(a term taken from Jeffrey Friedl's book) provides the means for specifying +that once a group has matched, it is not to be re-evaluated in this way. +

+

+If we use atomic grouping for the previous example, the matcher gives up +immediately on failing to match "foo" the first time. The notation is a kind of +special parenthesis, starting with (?> as in this example: +

+  (?>\d+)foo
+
+Perl 5.28 introduced an experimental alphabetic form starting with (* which may +be easier to remember: +
+  (*atomic:\d+)foo
+
+This kind of parenthesized group "locks up" the part of the pattern it contains +once it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal. +

+

+An alternative description is that a group of this type matches exactly the +string of characters that an identical standalone pattern would match, if +anchored at the current point in the subject string. +

+

+Atomic groups are not capture groups. Simple cases such as the above example +can be thought of as a maximizing repeat that must swallow everything it can. +So, while both \d+ and \d+? are prepared to adjust the number of digits they +match in order to make the rest of the pattern match, (?>\d+) can only match +an entire sequence of digits. +

+

+Atomic groups in general can of course contain arbitrarily complicated +expressions, and can be nested. However, when the contents of an atomic +group is just a single repeated item, as in the example above, a simpler +notation, called a "possessive quantifier" can be used. This consists of an +additional + character following a quantifier. Using this notation, the +previous example can be rewritten as +

+  \d++foo
+
+Note that a possessive quantifier can be used with an entire group, for +example: +
+  (abc|xyz){2,3}+
+
+Possessive quantifiers are always greedy; the setting of the PCRE2_UNGREEDY +option is ignored. They are a convenient notation for the simpler forms of +atomic group. However, there is no difference in the meaning of a possessive +quantifier and the equivalent atomic group, though there may be a performance +difference; possessive quantifiers should be slightly faster. +

+

+The possessive quantifier syntax is an extension to the Perl 5.8 syntax. +Jeffrey Friedl originated the idea (and the name) in the first edition of his +book. Mike McCloskey liked it, so implemented it when he built Sun's Java +package, and PCRE1 copied it from there. It found its way into Perl at release +5.10. +

+

+PCRE2 has an optimization that automatically "possessifies" certain simple +pattern constructs. For example, the sequence A+B is treated as A++B because +there is no point in backtracking into a sequence of A's when B must follow. +This feature can be disabled by the PCRE2_NO_AUTO_POSSESS option, by calling +pcre2_set_optimize() with a PCRE2_AUTO_POSSESS_OFF directive, or by +starting the pattern with (*NO_AUTO_POSSESS). +

+

+When a pattern contains an unlimited repeat inside a group that can itself be +repeated an unlimited number of times, the use of an atomic group is the only +way to avoid some failing matches taking a very long time indeed. The pattern +

+  (\D+|<\d+>)*[!?]
+
+matches an unlimited number of substrings that either consist of non-digits, or +digits enclosed in <>, followed by either ! or ?. When it matches, it runs +quickly. However, if it is applied to +
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+it takes a long time before reporting failure. This is because the string can +be divided between the internal \D+ repeat and the external * repeat in a +large number of ways, and all have to be tried. (The example uses [!?] rather +than a single character at the end, because both PCRE2 and Perl have an +optimization that allows for fast failure when a single character is used. They +remember the last single character that is required for a match, and fail early +if it is not present in the string.) If the pattern is changed so that it uses +an atomic group, like this: +
+  ((?>\D+)|<\d+>)*[!?]
+
+sequences of non-digits cannot be broken, and failure happens quickly. +

+
BACKREFERENCES
+

+Outside a character class, a backslash followed by a digit greater than 0 (and +possibly further digits) is a backreference to a capture group earlier (that +is, to its left) in the pattern, provided there have been that many previous +capture groups. +

+

+However, if the decimal number following the backslash is less than 8, it is +always taken as a backreference, and causes an error only if there are not that +many capture groups in the entire pattern. In other words, the group that is +referenced need not be to the left of the reference for numbers less than 8. A +"forward backreference" of this type can make sense when a repetition is +involved and the group to the right has participated in an earlier iteration. +

+

+It is not possible to have a numerical "forward backreference" to a group whose +number is 8 or more using this syntax because a sequence such as \50 is +interpreted as a character defined in octal. See the subsection entitled +"Non-printing characters" +above +for further details of the handling of digits following a backslash. Other +forms of backreferencing do not suffer from this restriction. In particular, +there is no problem when named capture groups are used (see below). +

+

+Another way of avoiding the ambiguity inherent in the use of digits following a +backslash is to use the \g escape sequence. This escape must be followed by a +signed or unsigned number, optionally enclosed in braces. These examples are +all identical: +

+  (ring), \1
+  (ring), \g1
+  (ring), \g{1}
+
+An unsigned number specifies an absolute reference without the ambiguity that +is present in the older syntax. It is also useful when literal digits follow +the reference. A signed number is a relative reference. Consider this example: +
+  (abc(def)ghi)\g{-1}
+
+The sequence \g{-1} is a reference to the capture group whose number is one +less than the number of the next group to be started, so in this example (where +the next group would be numbered 3) is it equivalent to \2, and \g{-2} would +be equivalent to \1. Note that if this construct is inside a capture group, +that group is included in the count, so in this example \g{-2} also refers to +group 1: +
+  (A)(\g{-2}B)
+
+The use of relative references can be helpful in long patterns, and also in +patterns that are created by joining together fragments that contain references +within themselves. +

+

+The sequence \g{+1} is a reference to the next capture group that is started +after this item, and \g{+2} refers to the one after that, and so on. This kind +of forward reference can be useful in patterns that repeat. Perl does not +support the use of + in this way. +

+

+A backreference matches whatever actually most recently matched the capture +group in the current subject string, rather than anything at all that matches +the group (see +"Groups as subroutines" +below for a way of doing that). So the pattern +

+  (sens|respons)e and \1ibility
+
+matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If caseful matching is in force at the time of the +backreference, the case of letters is relevant. For example, +
+  ((?i)rah)\s+\1
+
+matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original +capture group is matched caselessly. +

+

+There are several different ways of writing backreferences to named capture +groups. The .NET syntax is \k{name}, the Python syntax is (?=name), and the +original Perl syntax is \k<name> or \k'name'. All of these are now supported +by both Perl and PCRE2. Perl 5.10's unified backreference syntax, in which \g +can be used for both numeric and named references, is also supported by PCRE2. +We could rewrite the above example in any of the following ways: +

+  (?<p1>(?i)rah)\s+\k<p1>
+  (?'p1'(?i)rah)\s+\k{p1}
+  (?P<p1>(?i)rah)\s+(?P=p1)
+  (?<p1>(?i)rah)\s+\g{p1}
+
+A capture group that is referenced by name may appear in the pattern before or +after the reference. +

+

+There may be more than one backreference to the same group. If a group has not +actually been used in a particular match, backreferences to it always fail by +default. For example, the pattern +

+  (a|(bc))\2
+
+always fails if it starts to match "a" rather than "bc". However, if the +PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an +unset value matches an empty string. +

+

+Because there may be many capture groups in a pattern, all digits following a +backslash are taken as part of a potential backreference number. If the pattern +continues with a digit character, some delimiter must be used to terminate the +backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this +can be white space. Otherwise, the \g{} syntax or an empty comment (see +"Comments" +below) can be used. +

+
+Recursive backreferences +
+

+A backreference that occurs inside the group to which it refers fails when the +group is first used, so, for example, (a\1) never matches. However, such +references can be useful inside repeated groups. For example, the pattern +

+  (a|b\1)+
+
+matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of +the group, the backreference matches the character string corresponding to the +previous iteration. In order for this to work, the pattern must be such that +the first iteration does not need to match the backreference. This can be done +using alternation, as in the example above, or by a quantifier with a minimum +of zero. +

+

+For versions of PCRE2 less than 10.25, backreferences of this type used to +cause the group that they reference to be treated as an +atomic group. +This restriction no longer applies, and backtracking into such groups can occur +as normal. +

+
ASSERTIONS
+

+An assertion is a test that does not consume any characters. The test must +succeed for the match to continue. The simple assertions coded as \b, \B, +\A, \G, \Z, \z, ^ and $ are described +above. +

+

+More complicated assertions are coded as parenthesized groups. If matching such +a group succeeds, matching continues after it, but with the matching position +in the subject string reset to what it was before the assertion was processed. +

+

+A special kind of assertion, called a "scan substring" assertion, matches a +subpattern against a previously captured substring. This is described in the +section entitled +"Scan substring assertions" +below. It is a PCRE2 extension, not compatible with Perl. +

+

+The other goup-based assertions are of two kinds: those that look ahead of the +current position in the subject string, and those that look behind it, and in +each case an assertion may be positive (must match for the assertion to be +true) or negative (must not match for the assertion to be true). +

+

+The Perl-compatible lookaround assertions are atomic. If an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic assertions can be +useful. PCRE2 has some support for these, described in the section entitled +"Non-atomic assertions" +below, but they are not Perl-compatible. +

+

+A lookaround assertion may appear as the condition in a +conditional group +(see below). In this case, the result of matching the assertion determines +which branch of the condition is followed. +

+

+Assertion groups are not capture groups. If an assertion contains capture +groups within it, these are counted for the purposes of numbering the capture +groups in the whole pattern. Within each branch of an assertion, locally +captured substrings may be referenced in the usual way. For example, a sequence +such as (.)\g{-1} can be used to check that two adjacent characters are the +same. +

+

+When a branch within an assertion fails to match, any substrings that were +captured are discarded (as happens with any pattern branch that fails to +match). A negative assertion is true only when all its branches fail to match; +this means that no captured substrings are ever retained after a successful +negative assertion. When an assertion contains a matching branch, what happens +depends on the type of assertion. +

+

+For a positive assertion, internally captured substrings in the successful +branch are retained, and matching continues with the next pattern item after +the assertion. For a negative assertion, a matching branch means that the +assertion is not true. If such an assertion is being used as a condition in a +conditional group +(see below), captured substrings are retained, because matching continues with +the "no" branch of the condition. For other failing negative assertions, +control passes to the previous backtracking point, thus discarding any captured +strings within the assertion. +

+

+Most assertion groups may be repeated; though it makes no sense to assert the +same thing several times, the side effect of capturing in positive assertions +may occasionally be useful. However, an assertion that forms the condition for +a conditional group may not be quantified. PCRE2 used to restrict the +repetition of assertions, but from release 10.35 the only restriction is that +an unlimited maximum repetition is changed to be one more than the minimum. For +example, {3,} is treated as {3,4}. +

+
+Alphabetic assertion names +
+

+Traditionally, symbolic sequences such as (?= and (?<= have been used to +specify lookaround assertions. Perl 5.28 introduced some experimental +alphabetic alternatives which might be easier to remember. They all start with +(* instead of (? and must be written using lower case letters. PCRE2 supports +the following synonyms: +

+  (*positive_lookahead:  or (*pla: is the same as (?=
+  (*negative_lookahead:  or (*nla: is the same as (?!
+  (*positive_lookbehind: or (*plb: is the same as (?<=
+  (*negative_lookbehind: or (*nlb: is the same as (?<!
+
+For example, (*pla:foo) is the same assertion as (?=foo). In the following +sections, the various assertions are described using the original symbolic +forms. +

+
+Lookahead assertions +
+

+Lookahead assertions start with (?= for positive assertions and (?! for +negative assertions. For example, +

+  \w+(?=;)
+
+matches a word followed by a semicolon, but does not include the semicolon in +the match, and +
+  foo(?!bar)
+
+matches any occurrence of "foo" that is not followed by "bar". Note that the +apparently similar pattern +
+  (?!foo)bar
+
+does not find an occurrence of "bar" that is preceded by something other than +"foo"; it finds any occurrence of "bar" whatsoever, because the assertion +(?!foo) is always true when the next three characters are "bar". A +lookbehind assertion is needed to achieve the other effect. +

+

+If you want to force a matching failure at some point in a pattern, the most +convenient way to do it is with (?!) because an empty string always matches, so +an assertion that requires there not to be an empty string must always fail. +The backtracking control verb (*FAIL) or (*F) is a synonym for (?!). +

+
+Lookbehind assertions +
+

+Lookbehind assertions start with (?<= for positive assertions and (?<! for +negative assertions. For example, +

+  (?<!foo)bar
+
+does find an occurrence of "bar" that is not preceded by "foo". The contents of +a lookbehind assertion are restricted such that there must be a known maximum +to the lengths of all the strings it matches. There are two cases: +

+

+If every top-level alternative matches a fixed length, for example +

+  (?<=colour|color)
+
+there is a limit of 65535 characters to the lengths, which do not have to be +the same, as this example demonstrates. This is the only kind of lookbehind +supported by PCRE2 versions earlier than 10.43 and by the alternative matching +function pcre2_dfa_match(). +

+

+In PCRE2 10.43 and later, pcre2_match() supports lookbehind assertions in +which one or more top-level alternatives can match more than one string length, +for example +

+  (?<=colou?r)
+
+The maximum matching length for any branch of the lookbehind is limited to a +value set by the calling program (default 255 characters). Unlimited repetition +(for example \d*) is not supported. In some cases, the escape sequence \K +(see above) +can be used instead of a lookbehind assertion at the start of a pattern to get +round the length limit restriction. +

+

+In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which matches a +single code unit even in a UTF mode) to appear in lookbehind assertions, +because it makes it impossible to calculate the length of the lookbehind. The +\X and \R escapes, which can match different numbers of code units, are never +permitted in lookbehinds. +

+

+"Subroutine" +calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long +as the called capture group matches a limited-length string. However, +recursion, +that is, a "subroutine" call into a group that is already active, +is not supported. +

+

+PCRE2 supports backreferences in lookbehinds, but only if certain conditions +are met. The PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no +use of (?| in the pattern (it creates duplicate group numbers), and if the +backreference is by name, the name must be unique. Of course, the referenced +group must itself match a limited length substring. The following pattern +matches words containing at least two characters that begin and end with the +same character: +

+   \b(\w)\w++(?<=\1)
+
+

+

+Possessive quantifiers can be used in conjunction with lookbehind assertions to +specify efficient matching at the end of subject strings. Consider a simple +pattern such as +

+  abcd$
+
+when applied to a long string that does not match. Because matching proceeds +from left to right, PCRE2 will look for each "a" in the subject and then see if +what follows matches the rest of the pattern. If the pattern is specified as +
+  ^.*abcd$
+
+the initial .* matches the entire string at first, but when this fails (because +there is no following "a"), it backtracks to match all but the last character, +then all but the last two characters, and so on. Once again the search for "a" +covers the entire string, from right to left, so we are no better off. However, +if the pattern is written as +
+  ^.*+(?<=abcd)
+
+there can be no backtracking for the .*+ item because of the possessive +quantifier; it can match only the entire string. The subsequent lookbehind +assertion does a single test on the last four characters. If it fails, the +match fails immediately. For long strings, this approach makes a significant +difference to the processing time. +

+
+Using multiple assertions +
+

+Several assertions (of any sort) may occur in succession. For example, +

+  (?<=\d{3})(?<!999)foo
+
+matches "foo" preceded by three digits that are not "999". Notice that each of +the assertions is applied independently at the same point in the subject +string. First there is a check that the previous three characters are all +digits, and then there is a check that the same three characters are not "999". +This pattern does not match "foo" preceded by six characters, the first +of which are digits and the last three of which are not "999". For example, it +doesn't match "123abcfoo". A pattern to do that is +
+  (?<=\d{3}...)(?<!999)foo
+
+This time the first assertion looks at the preceding six characters, checking +that the first three are digits, and then the second assertion checks that the +preceding three characters are not "999". +

+

+Assertions can be nested in any combination. For example, +

+  (?<=(?<!foo)bar)baz
+
+matches an occurrence of "baz" that is preceded by "bar" which in turn is not +preceded by "foo", while +
+  (?<=\d{3}(?!999)...)foo
+
+is another pattern that matches "foo" preceded by three digits and any three +characters that are not "999". +

+
NON-ATOMIC ASSERTIONS
+

+Traditional lookaround assertions are atomic. That is, if an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic positive assertions +can be useful. PCRE2 provides these using the following syntax: +

+  (*non_atomic_positive_lookahead:  or (*napla: or (?*
+  (*non_atomic_positive_lookbehind: or (*naplb: or (?<*
+
+Consider the problem of finding the right-most word in a string that also +appears earlier in the string, that is, it must appear at least twice in total. +This pattern returns the required result as captured substring 1: +
+  ^(?x)(*napla: .* \b(\w++)) (?> .*? \b\1\b ){2}
+
+For a subject such as "word1 word2 word3 word2 word3 word4" the result is +"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the +"x" option, which causes white space (introduced for readability) to be +ignored. Inside the assertion, the greedy .* at first consumes the entire +string, but then has to backtrack until the rest of the assertion can match a +word, which is captured by group 1. In other words, when the assertion first +succeeds, it captures the right-most word in the string. +

+

+The current matching point is then reset to the start of the subject, and the +rest of the pattern match checks for two occurrences of the captured word, +using an ungreedy .*? to scan from the left. If this succeeds, we are done, but +if the last word in the string does not occur twice, this part of the pattern +fails. If a traditional atomic lookahead (?= or (*pla: had been used, the +assertion could not be re-entered, and the whole match would fail. The pattern +would succeed only if the very last word in the subject was found twice. +

+

+Using a non-atomic lookahead, however, means that when the last word does not +occur twice in the string, the lookahead can backtrack and find the second-last +word, and so on, until either the match succeeds, or all words have been +tested. +

+

+Two conditions must be met for a non-atomic assertion to be useful: the +contents of one or more capturing groups must change after a backtrack into the +assertion, and there must be a backreference to a changed group later in the +pattern. If this is not the case, the rest of the pattern match fails exactly +as before because nothing has changed, so using a non-atomic assertion just +wastes resources. +

+

+There is one exception to backtracking into a non-atomic assertion. If an +(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That +is, a subsequent match failure cannot backtrack into the assertion. +

+

+Non-atomic assertions are not supported by the alternative matching function +pcre2_dfa_match(). They are supported by JIT, but only if they do not +contain any control verbs such as (*ACCEPT). (This may change in future). Note +that assertions that appear as conditions for +conditional groups +(see below) must be atomic. +

+
SCAN SUBSTRING ASSERTIONS
+

+A special kind of assertion, not compatible with Perl, makes it possible to +check the contents of a captured substring by matching it with a subpattern. +Because this involves capturing, this feature is not supported by +pcre2_dfa_match(). +

+

+A scan substring assertion starts with the sequence (*scan_substring: or +(*scs: which is followed by a list of substring numbers (absolute or relative) +and/or substring names enclosed in single quotes or angle brackets, all within +parentheses. The rest of the item is the subpattern that is applied to the +substring, as shown in these examples: +

+  (*scan_substring:(1)...)
+  (*scs:(-2)...)
+  (*scs:('AB')...)
+  (*scs:(1,'AB',-2)...)
+
+The list of groups is checked in the order they are given, and it is the +contents of the first one that is found to be set that are scanned. When +PCRE2_DUPNAMES is set and there are ambiguous group names, all groups with the +same name are checked in numerical order. A scan substring assertion fails if +none of the groups it references have been set. +

+

+The pattern match on the substring is always anchored, that is, it must match +from the start of the substring. There is no "bumpalong" if it does not match +at the start. The end of the subject is temporarily reset to be the end of the +substring, so \Z, \z, and $ will match there. However, the start of the +subject is not reset. This means that ^ matches only if the substring is +actually at the start of the main subject, but it also means that lookbehind +assertions into what precedes the substring are possible. +

+

+Here is a very simple example: find a word that contains the rare (in English) +sequence of letters "rh" not at the start: +

+  \b(\w++)(*scs:(1).+rh)
+
+The first group captures a word which is then scanned by the second group. +This example does not actually need this heavyweight feature; the same match +can be achieved with: +
+  \b\w+?rh\w*\b
+
+When things are more complicated, however, scanning a captured substring can be +a useful way to describe the required match. For exmple, there is a rather +complicated pattern in the PCRE2 test data that checks an entire subject string +for a palindrome, that is, the sequence of letters is the same in both +directions. Suppose you want to search for individual words of two or more +characters such as "level" that are palindromes: +
+  (\b\w{2,}+\b)(*scs:(1)...palindrome-matching-pattern...)
+
+Within a substring scanning subpattern, references to other groups work as +normal. Capturing groups may appear, and will retain their values during +ongoing matching if the assertion succeeds. +

+
SCRIPT RUNS
+

+In concept, a script run is a sequence of characters that are all from the same +Unicode script such as Latin or Greek. However, because some scripts are +commonly used together, and because some diacritical and other marks are used +with multiple scripts, it is not that simple. There is a full description of +the rules that PCRE2 uses in the section entitled +"Script Runs" +in the +pcre2unicode +documentation. +

+

+If part of a pattern is enclosed between (*script_run: or (*sr: and a closing +parenthesis, it fails if the sequence of characters that it matches are not a +script run. After a failure, normal backtracking occurs. Script runs can be +used to detect spoofing attacks using characters that look the same, but are +from different scripts. The string "paypal.com" is an infamous example, where +the letters could be a mixture of Latin and Cyrillic. This pattern ensures that +the matched characters in a sequence of non-spaces that follow white space are +a script run: +

+  \s+(*sr:\S+)
+
+To be sure that they are all from the Latin script (for example), a lookahead +can be used: +
+  \s+(?=\p{Latin})(*sr:\S+)
+
+This works as long as the first character is expected to be a character in that +script, and not (for example) punctuation, which is allowed with any script. If +this is not the case, a more creative lookahead is needed. For example, if +digits, underscore, and dots are permitted at the start: +
+  \s+(?=[0-9_.]*\p{Latin})(*sr:\S+)
+
+
+

+

+In many cases, backtracking into a script run pattern fragment is not +desirable. The script run can employ an atomic group to prevent this. Because +this is a common requirement, a shorthand notation is provided by +(*atomic_script_run: or (*asr: +

+  (*asr:...) is the same as (*sr:(?>...))
+
+Note that the atomic group is inside the script run. Putting it outside would +not prevent backtracking into the script run pattern. +

+

+Support for script runs is not available if PCRE2 is compiled without Unicode +support. A compile-time error is given if any of the above constructs is +encountered. Script runs are not supported by the alternate matching function, +pcre2_dfa_match() because they use the same mechanism as capturing +parentheses. +

+

+Warning: The (*ACCEPT) control verb +(see below) +should not be used within a script run group, because it causes an immediate +exit from the group, bypassing the script run checking. +

+
CONDITIONAL GROUPS
+

+It is possible to cause the matching process to obey a pattern fragment +conditionally or to choose between two alternative fragments, depending on +the result of an assertion, or whether a specific capture group has +already been matched. The two possible forms of conditional group are: +

+  (?(condition)yes-pattern)
+  (?(condition)yes-pattern|no-pattern)
+
+If the condition is satisfied, the yes-pattern is used; otherwise the +no-pattern (if present) is used. An absent no-pattern is equivalent to an empty +string (it always matches). If there are more than two alternatives in the +group, a compile-time error occurs. Each of the two alternatives may itself +contain nested groups of any form, including conditional groups; the +restriction to two alternatives applies only at the level of the condition +itself. This pattern fragment is an example where the alternatives are complex: +
+  (?(1) (A|B|C) | (D | (?(2)E|F) | E) )
+
+
+

+

+There are five kinds of condition: references to capture groups, references to +recursion, two pseudo-conditions called DEFINE and VERSION, and assertions. +

+
+Checking for a used capture group by number +
+

+If the text between the parentheses consists of a sequence of digits, the +condition is true if a capture group of that number has previously matched. If +there is more than one capture group with the same number (see the earlier +section about duplicate group numbers), +the condition is true if any of them have matched. An alternative notation, +which is a PCRE2 extension, not supported by Perl, is to precede the digits +with a plus or minus sign. In this case, the group number is relative rather +than absolute. The most recently opened capture group (which could be enclosing +this condition) can be referenced by (?(-1), the next most recent by (?(-2), +and so on. Inside loops it can also make sense to refer to subsequent groups. +The next capture group to be opened can be referenced as (?(+1), and so on. The +value zero in any of these forms is not used; it provokes a compile-time error. +

+

+Consider the following pattern, which contains non-significant white space to +make it more readable (assume the PCRE2_EXTENDED option) and to divide it into +three parts for ease of discussion: +

+  ( \( )?    [^()]+    (?(1) \) )
+
+The first part matches an optional opening parenthesis, and if that +character is present, sets it as the first captured substring. The second part +matches one or more characters that are not parentheses. The third part is a +conditional group that tests whether or not the first capture group +matched. If it did, that is, if subject started with an opening parenthesis, +the condition is true, and so the yes-pattern is executed and a closing +parenthesis is required. Otherwise, since no-pattern is not present, the +conditional group matches nothing. In other words, this pattern matches a +sequence of non-parentheses, optionally enclosed in parentheses. +

+

+If you were embedding this pattern in a larger one, you could use a relative +reference: +

+  ...other stuff... ( \( )?    [^()]+    (?(-1) \) ) ...
+
+This makes the fragment independent of the parentheses in the larger pattern. +

+
+Checking for a used capture group by name +
+

+Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used +capture group by name. For compatibility with earlier versions of PCRE1, which +had this facility before Perl, the syntax (?(name)...) is also recognized. +Note, however, that undelimited names consisting of the letter R followed by +digits are ambiguous (see the following section). Rewriting the above example +to use a named group gives this: +

+  (?<OPEN> \( )?    [^()]+    (?(<OPEN>) \) )
+
+If the name used in a condition of this kind is a duplicate, the test is +applied to all groups of the same name, and is true if any one of them has +matched. +

+
+Checking for pattern recursion +
+

+"Recursion" in this sense refers to any subroutine-like call from one part of +the pattern to another, whether or not it is actually recursive. See the +sections entitled +"Recursive patterns" +and +"Groups as subroutines" +below for details of recursion and subroutine calls. +

+

+If a condition is the string (R), and there is no capture group with the name +R, the condition is true if matching is currently in a recursion or subroutine +call to the whole pattern or any capture group. If digits follow the letter R, +and there is no group with that name, the condition is true if the most recent +call is into a group with the given number, which must exist somewhere in the +overall pattern. This is a contrived example that is equivalent to a+b: +

+  ((?(R1)a+|(?1)b))
+
+However, in both cases, if there is a capture group with a matching name, the +condition tests for its being set, as described in the section above, instead +of testing for recursion. For example, creating a group with the name R1 by +adding (?<R1>) to the above pattern completely changes its meaning. +

+

+If a name preceded by ampersand follows the letter R, for example: +

+  (?(R&name)...)
+
+the condition is true if the most recent recursion is into a group of that name +(which must exist within the pattern). +

+

+This condition does not check the entire recursion stack. It tests only the +current level. If the name used in a condition of this kind is a duplicate, the +test is applied to all groups of the same name, and is true if any one of +them is the most recent recursion. +

+

+At "top level", all these recursion test conditions are false. +

+
+Defining capture groups for use by reference only +
+

+If the condition is the string (DEFINE), the condition is always false, even if +there is a group with the name DEFINE. In this case, there may be only one +alternative in the rest of the conditional group. It is always skipped if +control reaches this point in the pattern; the idea of DEFINE is that it can be +used to define subroutines that can be referenced from elsewhere. (The use of +subroutines +is described below.) For example, a pattern to match an IPv4 address such as +"192.168.23.245" could be written like this (ignore white space and line +breaks): +

+  (?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
+  \b (?&byte) (\.(?&byte)){3} \b
+
+The first part of the pattern is a DEFINE group inside which another group +named "byte" is defined. This matches an individual component of an IPv4 +address (a number less than 256). When matching takes place, this part of the +pattern is skipped because DEFINE acts like a false condition. The rest of the +pattern uses references to the named group to match the four dot-separated +components of an IPv4 address, insisting on a word boundary at each end. +

+
+Checking the PCRE2 version +
+

+Programs that link with a PCRE2 library can check the version by calling +pcre2_config() with appropriate arguments. Users of applications that do +not have access to the underlying code cannot do this. A special "condition" +called VERSION exists to allow such users to discover which version of PCRE2 +they are dealing with by using this condition to match a string such as +"yesno". VERSION must be followed either by "=" or ">=" and a version number. +For example: +

+  (?(VERSION>=10.4)yes|no)
+
+This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or +"no" otherwise. The fractional part of the version number may not contain more +than two digits. +

+
+Assertion conditions +
+

+If the condition is not in any of the above formats, it must be a parenthesized +assertion. This may be a positive or negative lookahead or lookbehind +assertion. However, it must be a traditional atomic assertion, not one of the +non-atomic assertions. +

+

+Consider this pattern, again containing non-significant white space, and with +the two alternatives on the second line: +

+  (?(?=[^a-z]*[a-z])
+  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )
+
+The condition is a positive lookahead assertion that matches an optional +sequence of non-letters followed by a letter. In other words, it tests for the +presence of at least one letter in the subject. If a letter is found, the +subject is matched against the first alternative; otherwise it is matched +against the second. This pattern matches strings in one of the two forms +dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. +

+

+When an assertion that is a condition contains capture groups, any +capturing that occurs in a matching branch is retained afterwards, for both +positive and negative assertions, because matching always continues after the +assertion, whether it succeeds or fails. (Compare non-conditional assertions, +for which captures are retained only for positive assertions that succeed.) +

+
COMMENTS
+

+There are two ways of including comments in patterns that are processed by +PCRE2. In both cases, the start of the comment must not be in a character +class, nor in the middle of any other sequence of related characters such as +(?: or a group name or number or a Unicode property name. The characters that +make up a comment play no part in the pattern matching. +

+

+The sequence (?# marks the start of a comment that continues up to the next +closing parenthesis. Nested parentheses are not permitted. If the +PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # character +also introduces a comment, which in this case continues to immediately after +the next newline character or character sequence in the pattern. Which +characters are interpreted as newlines is controlled by an option passed to the +compiling function or by a special sequence at the start of the pattern, as +described in the section entitled +"Newline conventions" +above. Note that the end of this type of comment is a literal newline sequence +in the pattern; escape sequences that happen to represent a newline do not +count. For example, consider this pattern when PCRE2_EXTENDED is set, and the +default newline convention (a single linefeed character) is in force: +

+  abc #comment \n still comment
+
+On encountering the # character, pcre2_compile() skips along, looking for +a newline in the pattern. The sequence \n is still literal at this stage, so +it does not terminate the comment. Only an actual character with the code value +0x0a (the default newline) does so. +

+
RECURSIVE PATTERNS
+

+Consider the problem of matching a string in parentheses, allowing for +unlimited nested parentheses. Without the use of recursion, the best that can +be done is to use a pattern that matches up to some fixed depth of nesting. It +is not possible to handle an arbitrary nesting depth. +

+

+For some time, Perl has provided a facility that allows regular expressions to +recurse (amongst other things). It does this by interpolating Perl code in the +expression at run time, and the code can refer to the expression itself. A Perl +pattern using code interpolation to solve the parentheses problem can be +created like this: +

+  $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x;
+
+The (?p{...}) item interpolates Perl code at run time, and in this case refers +recursively to the pattern in which it appears. +

+

+Obviously, PCRE2 cannot support the interpolation of Perl code. Instead, it +supports special syntax for recursion of the entire pattern, and also for +individual capture group recursion. After its introduction in PCRE1 and Python, +this kind of recursion was subsequently introduced into Perl at release 5.10. +

+

+A special item that consists of (? followed by a number greater than zero and a +closing parenthesis is a recursive subroutine call of the capture group of the +given number, provided that it occurs inside that group. (If not, it is a +non-recursive subroutine +call, which is described in the next section.) The special item (?R) or (?0) is +a recursive call of the entire regular expression. +

+

+This PCRE2 pattern solves the nested parentheses problem (assume the +PCRE2_EXTENDED option is set so that white space is ignored): +

+  \( ( [^()]++ | (?R) )* \)
+
+First it matches an opening parenthesis. Then it matches any number of +substrings which can either be a sequence of non-parentheses, or a recursive +match of the pattern itself (that is, a correctly parenthesized substring). +Finally there is a closing parenthesis. Note the use of a possessive quantifier +to avoid backtracking into sequences of non-parentheses. +

+

+If this were part of a larger pattern, you would not want to recurse the entire +pattern, so instead you could use this: +

+  ( \( ( [^()]++ | (?1) )* \) )
+
+We have put the pattern into parentheses, and caused the recursion to refer to +them instead of the whole pattern. +

+

+In a larger pattern, keeping track of parenthesis numbers can be tricky. This +is made easier by the use of relative references. Instead of (?1) in the +pattern above you can write (?-2) to refer to the second most recently opened +parentheses preceding the recursion. In other words, a negative number counts +capturing parentheses leftwards from the point at which it is encountered. +

+

+Be aware however, that if +duplicate capture group numbers +are in use, relative references refer to the earliest group with the +appropriate number. Consider, for example: +

+  (?|(a)|(b)) (c) (?-2)
+
+The first two capture groups (a) and (b) are both numbered 1, and group (c) +is number 2. When the reference (?-2) is encountered, the second most recently +opened parentheses has the number 1, but it is the first such group (the (a) +group) to which the recursion refers. This would be the same if an absolute +reference (?1) was used. In other words, relative references are just a +shorthand for computing a group number. +

+

+It is also possible to refer to subsequent capture groups, by writing +references such as (?+2). However, these cannot be recursive because the +reference is not inside the parentheses that are referenced. They are always +non-recursive subroutine +calls, as described in the next section. +

+

+An alternative approach is to use named parentheses. The Perl syntax for this +is (?&name); PCRE1's earlier syntax (?P>name) is also supported. We could +rewrite the above example as follows: +

+  (?<pn> \( ( [^()]++ | (?&pn) )* \) )
+
+If there is more than one group with the same name, the earliest one is +used. +

+

+The example pattern that we have been looking at contains nested unlimited +repeats, and so the use of a possessive quantifier for matching strings of +non-parentheses is important when applying the pattern to strings that do not +match. For example, when this pattern is applied to +

+  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+
+it yields "no match" quickly. However, if a possessive quantifier is not used, +the match runs for a very long time indeed because there are so many different +ways the + and * repeats can carve up the subject, and all have to be tested +before failure can be reported. +

+

+At the end of a match, the values of capturing parentheses are those from +the outermost level. If you want to obtain intermediate values, a callout +function can be used (see below and the +pcre2callout +documentation). If the pattern above is matched against +

+  (ab(cd)ef)
+
+the value for the inner capturing parentheses (numbered 2) is "ef", which is +the last value taken on at the top level. If a capture group is not matched at +the top level, its final captured value is unset, even if it was (temporarily) +set at a deeper level during the matching process. +

+

+Do not confuse the (?R) item with the condition (R), which tests for recursion. +Consider this pattern, which matches text in angle brackets, allowing for +arbitrary nesting. Only digits are allowed in nested brackets (that is, when +recursing), whereas any characters are permitted at the outer level. +

+  < (?: (?(R) \d++  | [^<>]*+) | (?R)) * >
+
+In this pattern, (?(R) is the start of a conditional group, with two different +alternatives for the recursive and non-recursive cases. The (?R) item is the +actual recursive call. +

+
+Differences in recursion processing between PCRE2 and Perl +
+

+Some former differences between PCRE2 and Perl no longer exist. +

+

+Before release 10.30, recursion processing in PCRE2 differed from Perl in that +a recursive subroutine call was always treated as an atomic group. That is, +once it had matched some of the subject string, it was never re-entered, even +if it contained untried alternatives and there was a subsequent matching +failure. (Historical note: PCRE implemented recursion before Perl did.) +

+

+Starting with release 10.30, recursive subroutine calls are no longer treated +as atomic. That is, they can be re-entered to try unused alternatives if there +is a matching failure later in the pattern. This is now compatible with the way +Perl works. If you want a subroutine call to be atomic, you must explicitly +enclose it in an atomic group. +

+

+Supporting backtracking into recursions simplifies certain types of recursive +pattern. For example, this pattern matches palindromic strings: +

+  ^((.)(?1)\2|.?)$
+
+The second branch in the group matches a single central character in the +palindrome when there are an odd number of characters, or nothing when there +are an even number of characters, but in order to work it has to be able to try +the second case when the rest of the pattern match fails. If you want to match +typical palindromic phrases, the pattern has to ignore all non-word characters, +which can be done like this: +
+  ^\W*+((.)\W*+(?1)\W*+\2|\W*+.?)\W*+$
+
+If run with the PCRE2_CASELESS option, this pattern matches phrases such as "A +man, a plan, a canal: Panama!". Note the use of the possessive quantifier *+ to +avoid backtracking into sequences of non-word characters. Without this, PCRE2 +takes a great deal longer (ten times or more) to match typical phrases, and +Perl takes so long that you think it has gone into a loop. +

+

+Another way in which PCRE2 and Perl used to differ in their recursion +processing is in the handling of captured values. Formerly in Perl, when a +group was called recursively or as a subroutine (see the next section), it +had no access to any values that were captured outside the recursion, whereas +in PCRE2 these values can be referenced. Consider this pattern: +

+  ^(.)(\1|a(?2))
+
+This pattern matches "bab". The first capturing parentheses match "b", then in +the second group, when the backreference \1 fails to match "b", the second +alternative matches "a" and then recurses. In the recursion, \1 does now match +"b" and so the whole match succeeds. This match used to fail in Perl, but in +later versions (I tried 5.024) it now works. +

+
GROUPS AS SUBROUTINES
+

+If the syntax for a recursive group call (either by number or by name) is used +outside the parentheses to which it refers, it operates a bit like a subroutine +in a programming language. More accurately, PCRE2 treats the referenced group +as an independent subpattern which it tries to match at the current matching +position. The called group may be defined before or after the reference. A +numbered reference can be absolute or relative, as in these examples: +

+  (...(absolute)...)...(?2)...
+  (...(relative)...)...(?-1)...
+  (...(?+1)...(relative)...
+
+An earlier example pointed out that the pattern +
+  (sens|respons)e and \1ibility
+
+matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If instead the pattern +
+  (sens|respons)e and (?1)ibility
+
+is used, it does match "sense and responsibility" as well as the other two +strings. Another example is given in the discussion of DEFINE above. +

+

+Like recursions, subroutine calls used to be treated as atomic, but this +changed at PCRE2 release 10.30, so backtracking into subroutine calls can now +occur. However, any capturing parentheses that are set during the subroutine +call revert to their previous values afterwards. +

+

+Processing options such as case-independence are fixed when a group is +defined, so if it is used as a subroutine, such options cannot be changed for +different calls. For example, consider this pattern: +

+  (abc)(?i:(?-1))
+
+It matches "abcabc". It does not match "abcABC" because the change of +processing option does not affect the called group. +

+

+The behaviour of +backtracking control verbs +in groups when called as subroutines is described in the section entitled +"Backtracking verbs in subroutines" +below. +

+
ONIGURUMA SUBROUTINE SYNTAX
+

+For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for calling a group as a subroutine, possibly recursively. Here are two +of the examples used above, rewritten using this syntax: +

+  (?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
+  (sens|respons)e and \g'1'ibility
+
+PCRE2 supports an extension to Oniguruma: if a number is preceded by a +plus or a minus sign it is taken as a relative reference. For example: +
+  (abc)(?i:\g<-1>)
+
+Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not +synonymous. The former is a backreference; the latter is a subroutine call. +

+
CALLOUTS
+

+Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl +code to be obeyed in the middle of matching a regular expression. This makes it +possible, amongst other things, to extract different substrings that match the +same pair of parentheses when there is a repetition. +

+

+PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl +code. The feature is called "callout". The caller of PCRE2 provides an external +function by putting its entry point in a match context using the function +pcre2_set_callout(), and then passing that context to pcre2_match() +or pcre2_dfa_match(). If no match context is passed, or if the callout +entry point is set to NULL, callout points will be passed over silently during +matching. To disallow callouts in the pattern syntax, you may use the +PCRE2_EXTRA_NEVER_CALLOUT option. +

+

+Within a regular expression, (?C<arg>) indicates a point at which the external +function is to be called. There are two kinds of callout: those with a +numerical argument and those with a string argument. (?C) on its own with no +argument is treated as (?C0). A numerical argument allows the application to +distinguish between different callouts. String arguments were added for release +10.20 to make it possible for script languages that use PCRE2 to embed short +scripts within patterns in a similar way to Perl. +

+

+During matching, when PCRE2 reaches a callout point, the external function is +called. It is provided with the number or string argument of the callout, the +position in the pattern, and one item of data that is also set in the match +block. The callout function may cause matching to proceed, to backtrack, or to +fail. +

+

+By default, PCRE2 implements a number of optimizations at matching time, and +one side-effect is that sometimes callouts are skipped. If you need all +possible callouts to happen, you need to set options that disable the relevant +optimizations. More details, including a complete description of the +programming interface to the callout function, are given in the +pcre2callout +documentation. +

+
+Callouts with numerical arguments +
+

+If you just want to have a means of identifying different callout points, put a +number less than 256 after the letter C. For example, this pattern has two +callout points: +

+  (?C1)abc(?C2)def
+
+If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical +callouts are automatically installed before each item in the pattern. They are +all numbered 255. If there is a conditional group in the pattern whose +condition is an assertion, an additional callout is inserted just before the +condition. An explicit callout may also be set at this position, as in this +example: +
+  (?(?C9)(?=a)abc|def)
+
+Note that this applies only to assertion conditions, not to other types of +condition. +

+
+Callouts with string arguments +
+

+A delimited string may be used instead of a number as a callout argument. The +starting delimiter must be one of ` ' " ^ % # $ { and the ending delimiter is +the same as the start, except for {, where the ending delimiter is }. If the +ending delimiter is needed within the string, it must be doubled. For +example: +

+  (?C'ab ''c'' d')xyz(?C{any text})pqr
+
+The doubling is removed before the string is passed to the callout function. +

+
BACKTRACKING CONTROL
+

+There are a number of special "Backtracking Control Verbs" (to use Perl's +terminology) that modify the behaviour of backtracking during matching. They +are generally of the form (*VERB) or (*VERB:NAME). Some verbs take either form, +and may behave differently depending on whether or not a name argument is +present. The names are not required to be unique within the pattern. +

+

+By default, for compatibility with Perl, a name is any sequence of characters +that does not include a closing parenthesis. The name is not processed in +any way, and it is not possible to include a closing parenthesis in the name. +This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result +is no longer Perl-compatible. +

+

+When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names +and only an unescaped closing parenthesis terminates the name. However, the +only backslash items that are permitted are \Q, \E, and sequences such as +\x{100} that define character code points. Character type escapes such as \d +are faulted. +

+

+A closing parenthesis can be included in a name either as \) or between \Q +and \E. In addition to backslash processing, if the PCRE2_EXTENDED or +PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb names is +skipped, and #-comments are recognized, exactly as in the rest of the pattern. +PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect verb names unless +PCRE2_ALT_VERBNAMES is also set. +

+

+The maximum length of a name is 255 in the 8-bit library and 65535 in the +16-bit and 32-bit libraries. If the name is empty, that is, if the closing +parenthesis immediately follows the colon, the effect is as if the colon were +not there. Any number of these verbs may occur in a pattern. Except for +(*ACCEPT), they may not be quantified. +

+

+Since these verbs are specifically related to backtracking, most of them can be +used only when the pattern is to be matched using the traditional matching +function or JIT, because they use backtracking algorithms. With the exception +of (*FAIL), which behaves like a failing negative assertion, the backtracking +control verbs cause an error if encountered by the DFA matching function. +

+

+The behaviour of these verbs in +repeated groups, +assertions, +and in +capture groups called as subroutines +(whether or not recursively) is documented below. +

+
+Optimizations that affect backtracking verbs +
+

+PCRE2 contains some optimizations that are used to speed up matching by running +some checks at the start of each match attempt. For example, it may know the +minimum length of matching subject, or that a particular character must be +present. When one of these optimizations bypasses the running of a match, any +included backtracking verbs will not, of course, be processed. You can suppress +the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option +when calling pcre2_compile(), by calling pcre2_set_optimize() with a +PCRE2_START_OPTIMIZE_OFF directive, or by starting the pattern with +(*NO_START_OPT). There is more discussion of this option in the section +entitled +"Compiling a pattern" +in the +pcre2api +documentation. +

+

+Experiments with Perl suggest that it too has similar optimizations, and like +PCRE2, turning them off can change the result of a match. +

+
+Verbs that act immediately +
+

+The following verbs act as soon as they are encountered. +

+   (*ACCEPT) or (*ACCEPT:NAME)
+
+This verb causes the match to end successfully, skipping the remainder of the +pattern. However, when it is inside a capture group that is called as a +subroutine, only that group is ended successfully. Matching then continues +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +assertion succeeds; in a negative assertion, the assertion fails. +

+

+If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For +example: +

+  A((?:A|B(*ACCEPT)|C)D)
+
+This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by +the outer parentheses. +

+

+(*ACCEPT) is the only backtracking verb that is allowed to be quantified +because an ungreedy quantification with a minimum of zero acts only when a +backtrack happens. Consider, for example, +

+  (A(*ACCEPT)??B)C
+
+where A, B, and C may be complex expressions. After matching "A", the matcher +processes "BC"; if that fails, causing a backtrack, (*ACCEPT) is triggered and +the match succeeds. In both cases, all but C is captured. Whereas (*COMMIT) +(see below) means "fail on backtrack", a repeated (*ACCEPT) of this type means +"succeed on backtrack". +

+

+Warning: (*ACCEPT) should not be used within a script run group, because +it causes an immediate exit from the group, bypassing the script run checking. +

+  (*FAIL) or (*FAIL:NAME)
+
+This verb causes a matching failure, forcing backtracking to occur. It may be +abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl +documentation notes that it is probably useful only when combined with (?{}) or +(??{}). Those are, of course, Perl features that are not present in PCRE2. The +nearest equivalent is the callout feature, as for example in this pattern: +
+  a+(?C)(*FAIL)
+
+A match with the string "aaaa" always fails, but the callout is taken before +each backtrack happens (in this example, 10 times). +

+

+(*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is recorded just before +the verb acts. +

+
+Recording which path was taken +
+

+There is one verb whose main purpose is to track how a match was arrived at, +though it also has a secondary use in conjunction with advancing the match +starting point (see (*SKIP) below). +

+  (*MARK:NAME) or (*:NAME)
+
+A name is always required with this verb. For all the other backtracking +control verbs, a NAME argument is optional. +

+

+When a match succeeds, the name of the last-encountered mark name on the +matching path is passed back to the caller as described in the section entitled +"Other information about the match" +in the +pcre2api +documentation. This applies to all instances of (*MARK) and other verbs, +including those inside assertions and atomic groups. However, there are +differences in those cases when (*MARK) is used in conjunction with (*SKIP) as +described below. +

+

+The mark name that was last encountered on the matching path is passed back. A +verb without a NAME argument is ignored for this purpose. Here is an example of +pcre2test output, where the "mark" modifier requests the retrieval and +outputting of (*MARK) data: +

+    re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
+  data> XY
+   0: XY
+  MK: A
+  XZ
+   0: XZ
+  MK: B
+
+The (*MARK) name is tagged with "MK:" in this output, and in this example it +indicates which of the two alternatives matched. This is a more efficient way +of obtaining this information than putting each alternative in its own +capturing parentheses. +

+

+If a verb with a name is encountered in a positive assertion that is true, the +name is recorded and passed back if it is the last-encountered. This does not +happen for negative assertions or failing positive assertions. +

+

+After a partial match or a failed match, the last encountered name in the +entire match process is returned. For example: +

+    re> /X(*MARK:A)Y|X(*MARK:B)Z/mark
+  data> XP
+  No match, mark = B
+
+Note that in this unanchored example the mark is retained from the match +attempt that started at the letter "X" in the subject. Subsequent match +attempts starting at "P" and then with an empty string do not get as far as the +(*MARK) item, but nevertheless do not reset it. +

+

+If you are interested in (*MARK) values after failed matches, you should +probably either set the PCRE2_NO_START_OPTIMIZE option or call +pcre2_set_optimize() with a PCRE2_START_OPTIMIZE_OFF directive +(see above) +to ensure that the match is always attempted. +

+
+Verbs that act after backtracking +
+

+The following verbs do nothing when they are encountered. Matching continues +with what follows, but if there is a subsequent match failure, causing a +backtrack to the verb, a failure is forced. That is, backtracking cannot pass +to the left of the verb. However, when one of these verbs appears inside an +atomic group or in an atomic lookaround assertion that is true, its effect is +confined to that group, because once the group has been matched, there is never +any backtracking into it. Backtracking from beyond an atomic assertion or group +ignores the entire group, and seeks a preceding backtracking point. +

+

+These verbs differ in exactly what kind of failure occurs when backtracking +reaches them. The behaviour described below is what happens when the verb is +not in a subroutine or an assertion. Subsequent sections cover these special +cases. +

+  (*COMMIT) or (*COMMIT:NAME)
+
+This verb causes the whole match to fail outright if there is a later matching +failure that causes backtracking to reach it. Even if the pattern is +unanchored, no further attempts to find a match by advancing the starting point +take place. If (*COMMIT) is the only backtracking verb that is encountered, +once it has been passed pcre2_match() is committed to finding a match at +the current starting point, or not at all. For example: +
+  a+(*COMMIT)b
+
+This matches "xxaab" but not "aacaab". It can be thought of as a kind of +dynamic anchor, or "I've started, so I must finish." +

+

+The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names that are set with +(*MARK), ignoring those set by any of the other backtracking verbs. +

+

+If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +match does not always guarantee that a match must be at this starting point. +

+

+Note that (*COMMIT) at the start of a pattern is not the same as an anchor, +unless PCRE2's start-of-match optimizations are turned off, as shown in this +output from pcre2test: +

+    re> /(*COMMIT)abc/
+  data> xyzabc
+   0: abc
+  data>
+  re> /(*COMMIT)abc/no_start_optimize
+  data> xyzabc
+  No match
+
+For the first pattern, PCRE2 knows that any match must start with "a", so the +optimization skips along the subject to "a" before applying the pattern to the +first set of data. The match attempt then succeeds. The second pattern disables +the optimization that skips along to the first character. The pattern is now +applied starting at "x", and so the (*COMMIT) causes the match to fail without +trying any other starting points. +
+  (*PRUNE) or (*PRUNE:NAME)
+
+This verb causes the match to fail at the current starting position in the +subject if there is a later matching failure that causes backtracking to reach +it. If the pattern is unanchored, the normal "bumpalong" advance to the next +starting character then happens. Backtracking can occur as usual to the left of +(*PRUNE), before it is reached, or when matching to the right of (*PRUNE), but +if there is no match to the right, backtracking cannot cross (*PRUNE). In +simple cases, the use of (*PRUNE) is just an alternative to an atomic group or +possessive quantifier, but there are some uses of (*PRUNE) that cannot be +expressed in any other way. In an anchored pattern (*PRUNE) has the same effect +as (*COMMIT). +

+

+The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +

+  (*SKIP)
+
+This verb, when given without a name, is like (*PRUNE), except that if the +pattern is unanchored, the "bumpalong" advance is not to the next character, +but to the position in the subject where (*SKIP) was encountered. (*SKIP) +signifies that whatever text was matched leading up to it cannot be part of a +successful match if there is a later mismatch. Consider: +
+  a+(*SKIP)b
+
+If the subject is "aaaac...", after the first match attempt fails (starting at +the first character in the string), the starting point skips on to start the +next attempt at "c". Note that a possessive quantifier does not have the same +effect as this example; although it would suppress backtracking during the +first match attempt, the second attempt would start at the second character +instead of skipping on to "c". +

+

+If (*SKIP) is used to specify a new starting position that is the same as the +starting position of the current match, or (by being inside a lookbehind) +earlier, the position specified by (*SKIP) is ignored, and instead the normal +"bumpalong" occurs. +

+  (*SKIP:NAME)
+
+When (*SKIP) has an associated name, its behaviour is modified. When such a +(*SKIP) is triggered, the previous path through the pattern is searched for the +most recent (*MARK) that has the same name. If one is found, the "bumpalong" +advance is to the subject position that corresponds to that (*MARK) instead of +to where (*SKIP) was encountered. If no (*MARK) with a matching name is found, +the (*SKIP) is ignored. +

+

+The search for a (*MARK) name uses the normal backtracking mechanism, which +means that it does not see (*MARK) settings that are inside atomic groups or +assertions, because they are never re-entered by backtracking. Compare the +following pcre2test examples: +

+    re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/
+  data: abc
+   0: a
+   1: a
+  data:
+    re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/
+  data: abc
+   0: b
+   1: b
+
+In the first example, the (*MARK) setting is in an atomic group, so it is not +seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. This allows +the second branch of the pattern to be tried at the first character position. +In the second example, the (*MARK) setting is not in an atomic group. This +allows (*SKIP:X) to find the (*MARK) when it backtracks, and this causes a new +matching attempt to start at the second character. This time, the (*MARK) is +never seen because "a" does not match "b", so the matcher immediately jumps to +the second branch of the pattern. +

+

+Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +names that are set by other backtracking verbs. +

+  (*THEN) or (*THEN:NAME)
+
+This verb causes a skip to the next innermost alternative when backtracking +reaches it. That is, it cancels any further backtracking within the current +alternative. Its name comes from the observation that it can be used for a +pattern-based if-then-else block: +
+  ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
+
+If the COND1 pattern matches, FOO is tried (and possibly further items after +the end of the group if FOO succeeds); on failure, the matcher skips to the +second alternative and tries COND2, without backtracking into COND1. If that +succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no +more alternatives, so there is a backtrack to whatever came before the entire +group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). +

+

+The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +

+

+A group that does not contain a | character is just a part of the enclosing +alternative; it is not a nested alternation with only one alternative. The +effect of (*THEN) extends beyond such a group to the enclosing alternative. +Consider this pattern, where A, B, etc. are complex pattern fragments that do +not contain any | characters at this level: +

+  A (B(*THEN)C) | D
+
+If A and B are matched, but there is a failure in C, matching does not +backtrack into A; instead it moves to the next alternative, that is, D. +However, if the group containing (*THEN) is given an alternative, it +behaves differently: +
+  A (B(*THEN)C | (*FAIL)) | D
+
+The effect of (*THEN) is now confined to the inner group. After a failure in C, +matching moves to (*FAIL), which causes the whole group to fail because there +are no more alternatives to try. In this case, matching does backtrack into A. +

+

+Note that a conditional group is not considered as having two alternatives, +because only one is ever used. In other words, the | character in a conditional +group has a different meaning. Ignoring white space, consider: +

+  ^.*? (?(?=a) a | b(*THEN)c )
+
+If the subject is "ba", this pattern does not match. Because .*? is ungreedy, +it initially matches zero characters. The condition (?=a) then fails, the +character "b" is matched, but "c" is not. At this point, matching does not +backtrack to .*? as might perhaps be expected from the presence of the | +character. The conditional group is part of the single alternative that +comprises the whole pattern, and so the match fails. (If there was a backtrack +into .*?, allowing it to match "b", the match would succeed.) +

+

+The verbs just described provide four different "strengths" of control when +subsequent matching fails. (*THEN) is the weakest, carrying on the match at the +next alternative. (*PRUNE) comes next, failing the match at the current +starting position, but allowing an advance to the next character (for an +unanchored pattern). (*SKIP) is similar, except that the advance may be more +than one character. (*COMMIT) is the strongest, causing the entire match to +fail. +

+
+More than one backtracking verb +
+

+If more than one backtracking verb is present in a pattern, the one that is +backtracked onto first acts. For example, consider this pattern, where A, B, +etc. are complex pattern fragments: +

+  (A(*COMMIT)B(*THEN)C|ABD)
+
+If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +the next alternative (ABD) to be tried. This behaviour is consistent, but is +not always the same as Perl's. It means that if two or more backtracking verbs +appear in succession, all but the last of them has no effect. Consider this +example: +
+  ...(*COMMIT)(*PRUNE)...
+
+If there is a matching failure to the right, backtracking onto (*PRUNE) causes +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT). +

+
+Backtracking verbs in repeated groups +
+

+PCRE2 sometimes differs from Perl in its handling of backtracking verbs in +repeated groups. For example, consider: +

+  /(a(*COMMIT)b)+ac/
+
+If the subject is "abac", Perl matches unless its optimizations are disabled, +but PCRE2 always fails because the (*COMMIT) in the second repeat of the group +acts. +

+
+Backtracking verbs in assertions +
+

+(*FAIL) in any assertion has its normal effect: it forces an immediate +backtrack. The behaviour of the other backtracking verbs depends on whether or +not the assertion is standalone or acting as the condition in a conditional +group. +

+

+(*ACCEPT) in a standalone positive assertion causes the assertion to succeed +without any further processing; captured strings and a mark name (if set) are +retained. In a standalone negative assertion, (*ACCEPT) causes the assertion to +fail without any further processing; captured substrings and any mark name are +discarded. +

+

+If the assertion is a condition, (*ACCEPT) causes the condition to be true for +a positive assertion and false for a negative one; captured substrings are +retained in both cases. +

+

+The remaining verbs act only when a later failure causes a backtrack to +reach them. This means that, for the Perl-compatible assertions, their effect +is confined to the assertion, because Perl lookaround assertions are atomic. A +backtrack that occurs after such an assertion is complete does not jump back +into the assertion. Note in particular that a (*MARK) name that is set in an +assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern. +

+

+PCRE2 now supports non-atomic positive assertions and also "scan substring" +assertions, as described in the sections entitled +"Non-atomic assertions" +and +"Scan substring assertions" +above. These assertions must be standalone (not used as conditions). They are +not Perl-compatible. For these assertions, a later backtrack does jump back +into the assertion, and therefore verbs such as (*COMMIT) can be triggered by +backtracks from later in the pattern. +

+

+The effect of (*THEN) is not allowed to escape beyond an assertion. If there +are no more branches to try, (*THEN) causes a positive assertion to be false, +and a negative assertion to be true. This behaviour differs from Perl when the +assertion has only one branch. +

+

+The other backtracking verbs are not treated specially if they appear in a +standalone positive assertion. In a conditional positive assertion, +backtracking (from within the assertion) into (*COMMIT), (*SKIP), or (*PRUNE) +causes the condition to be false. However, for both standalone and conditional +negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes +the assertion to be true, without considering any further alternative branches. +

+
+Backtracking verbs in subroutines +
+

+These behaviours occur whether or not the group is called recursively. +

+

+(*ACCEPT) in a group called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the +subroutine call. Perl documents this behaviour. Perl's treatment of the other +verbs in subroutines is different in some cases. +

+

+(*FAIL) in a group called as a subroutine has its normal effect: it forces +an immediate backtrack. +

+

+(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when +triggered by being backtracked to in a group called as a subroutine. There is +then a backtrack at the outer level. +

+

+(*THEN), when triggered, skips to the next alternative in the innermost +enclosing group that has alternatives (its normal behaviour). However, if there +is no such group within the subroutine's group, the subroutine match fails and +there is a backtrack at the outer level. +

+
EBCDIC ENVIRONMENTS
+

+Differences in the way PCRE behaves when it is running in an EBCDIC environment +are covered in this section. +

+
+Escape sequences +
+

+When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. \a, \e, +\f, \n, \r, and \t generate the appropriate EBCDIC code values. The \c +escape is processed as specified for Perl in the perlebcdic document. The +only characters that are allowed after \c are A-Z, a-z, or one of @, [, \, ], +^, _, or ?. Any other character provokes a compile-time error. The sequence +\c@ encodes character code 0; after \c the letters (in either case) encode +characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 +(hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F). +

+

+Thus, apart from \c?, these escapes generate the same character code values as +they do in an ASCII or Unicode environment, though the meanings of the values +mostly differ. For example, \cG always generates code value 7, which is BEL in +ASCII but DEL in EBCDIC. +

+

+The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but +because 127 is not a control character in EBCDIC, Perl makes it generate the +APC character. Unfortunately, there are several variants of EBCDIC. In most of +them the APC character has the value 255 (hex FF), but in the one Perl calls +POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC +values, PCRE2 makes \c? generate 95; otherwise it generates 255. +

+
+Character classes +
+

+In character classes there is a special case in EBCDIC environments for ranges +whose end points are both specified as literal letters in the same case. For +compatibility with Perl, EBCDIC code points within the range that are not +letters are omitted. For example, [h-k] matches only four characters, even +though the EBCDIC codes for h and k are 0x88 and 0x92, a range of 11 code +points. However, if the range is specified numerically, for example, +[\x88-\x92] or [h-\x92], all code points are included. +

+
SEE ALSO
+

+pcre2api(3), pcre2callout(3), pcre2matching(3), +pcre2syntax(3), pcre2(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 27 November 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2perform.html b/3rd/pcre2/doc/html/pcre2perform.html new file mode 100644 index 00000000..b595119b --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2perform.html @@ -0,0 +1,280 @@ + + +pcre2perform specification + + +

pcre2perform man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 PERFORMANCE
+

+Two aspects of performance are discussed below: memory usage and processing +time. The way you express your pattern as a regular expression can affect both +of them. +

+
COMPILED PATTERN MEMORY USAGE
+

+Patterns are compiled by PCRE2 into a reasonably efficient interpretive code, +so that most simple patterns do not use much memory for storing the compiled +version. However, there is one case where the memory usage of a compiled +pattern can be unexpectedly large. If a parenthesized group has a quantifier +with a minimum greater than 1 and/or a limited maximum, the whole group is +repeated in the compiled code. For example, the pattern +

+  (abc|def){2,4}
+
+is compiled as if it were +
+  (abc|def)(abc|def)((abc|def)(abc|def)?)?
+
+(Technical aside: It is done this way so that backtrack points within each of +the repetitions can be independently maintained.) +

+

+For regular expressions whose quantifiers use only small numbers, this is not +usually a problem. However, if the numbers are large, and particularly if such +repetitions are nested, the memory usage can become an embarrassment. For +example, the very simple pattern +

+  ((ab){1,1000}c){1,3}
+
+uses over 50KiB when compiled using the 8-bit library. When PCRE2 is +compiled with its default internal pointer size of two bytes, the size limit on +a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and +this is reached with the above pattern if the outer repetition is increased +from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus +handle larger compiled patterns, but it is better to try to rewrite your +pattern to use less memory if you can. +

+

+One way of reducing the memory usage for such patterns is to make use of +PCRE2's +"subroutine" +facility. Re-writing the above pattern as +

+  ((ab)(?2){0,999}c)(?1){0,2}
+
+reduces the memory requirements to around 16KiB, and indeed it remains under +20KiB even with the outer repetition increased to 100. However, this kind of +pattern is not always exactly equivalent, because any captures within +subroutine calls are lost when the subroutine completes. If this is not a +problem, this kind of rewriting will allow you to process patterns that PCRE2 +cannot otherwise handle. The matching performance of the two different versions +of the pattern are roughly the same. (This applies from release 10.30 - things +were different in earlier releases.) +

+
STACK AND HEAP USAGE AT RUN TIME
+

+From release 10.30, the interpretive (non-JIT) version of pcre2_match() +uses very little system stack at run time. In earlier releases recursive +function calls could use a great deal of stack, and this could cause problems, +but this usage has been eliminated. Backtracking positions are now explicitly +remembered in memory frames controlled by the code. +

+

+The size of each frame depends on the size of pointer variables and the number +of capturing parenthesized groups in the pattern being matched. On a 64-bit +system the frame size for a pattern with no captures is 128 bytes. For each +capturing group the size increases by 16 bytes. +

+

+Until release 10.41, an initial 20KiB frames vector was allocated on the system +stack, but this still caused some issues for multi-thread applications where +each thread has a very small stack. From release 10.41 backtracking memory +frames are always held in heap memory. An initial heap allocation is obtained +the first time any match data block is passed to pcre2_match(). This is +remembered with the match data block and re-used if that block is used for +another match. It is freed when the match data block itself is freed. +

+

+The size of the initial block is the larger of 20KiB or ten times the pattern's +frame size, unless the heap limit is less than this, in which case the heap +limit is used. If the initial block proves to be too small during matching, it +is replaced by a larger block, subject to the heap limit. The heap limit is +checked only when a new block is to be allocated. Reducing the heap limit +between calls to pcre2_match() with the same match data block does not +affect the saved block. +

+

+In contrast to pcre2_match(), pcre2_dfa_match() does use recursive +function calls, but only for processing atomic groups, lookaround assertions, +and recursion within the pattern. The original version of the code used to +allocate quite large internal workspace vectors on the stack, which caused some +problems for some patterns in environments with small stacks. From release +10.32 the code for pcre2_dfa_match() has been re-factored to use heap +memory when necessary for internal workspace when recursing, though recursive +function calls are still used. +

+

+The "match depth" parameter can be used to limit the depth of function +recursion, and the "match heap" parameter to limit heap memory in +pcre2_dfa_match(). +

+
PROCESSING TIME
+

+Certain items in regular expression patterns are processed more efficiently +than others. It is more efficient to use a character class like [aeiou] than a +set of single-character alternatives such as (a|e|i|o|u). In general, the +simplest construction that provides the required behaviour is usually the most +efficient. Jeffrey Friedl's book contains a lot of useful general discussion +about optimizing regular expressions for efficient performance. This document +contains a few observations about PCRE2. +

+

+Using Unicode character properties (the \p, \P, and \X escapes) is slow, +because PCRE2 has to use a multi-stage table lookup whenever it needs a +character's property. If you can find an alternative pattern that does not use +character properties, it will probably be faster. +

+

+By default, the escape sequences \b, \d, \s, and \w, and the POSIX +character classes such as [:alpha:] do not use Unicode properties, partly for +backwards compatibility, and partly for performance reasons. However, you can +set the PCRE2_UCP option or start the pattern with (*UCP) if you want Unicode +character properties to be used. This can double the matching time for items +such as \d, when matched with pcre2_match(); the performance loss is +less with a DFA matching function, and in both cases there is not much +difference for \b. +

+

+When a pattern begins with .* not in atomic parentheses, nor in parentheses +that are the subject of a backreference, and the PCRE2_DOTALL option is set, +the pattern is implicitly anchored by PCRE2, since it can match only at the +start of a subject string. If the pattern has multiple top-level branches, they +must all be anchorable. The optimization can be disabled by the +PCRE2_NO_DOTSTAR_ANCHOR option, and is automatically disabled if the pattern +contains (*PRUNE) or (*SKIP). +

+

+If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, because the +dot metacharacter does not then match a newline, and if the subject string +contains newlines, the pattern may match from the character immediately +following one of them instead of from the very start. For example, the pattern +

+  .*second
+
+matches the subject "first\nand second" (where \n stands for a newline +character), with the match starting at the seventh character. In order to do +this, PCRE2 has to retry the match starting after every newline in the subject. +

+

+If you are using such a pattern with subject strings that do not contain +newlines, the best performance is obtained by setting PCRE2_DOTALL, or starting +the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE2 +from having to scan along the subject looking for a newline to restart at. +

+

+Beware of patterns that contain nested indefinite repeats. These can take a +long time to run when applied to a string that does not match. Consider the +pattern fragment +

+  ^(a+)*
+
+This can match "aaaa" in 16 different ways, and this number increases very +rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 +times, and for each of those cases other than 0 or 4, the + repeats can match +different numbers of times.) When the remainder of the pattern is such that the +entire match is going to fail, PCRE2 has in principle to try every possible +variation, and this can take an extremely long time, even for relatively short +strings. +

+

+An optimization catches some of the more simple cases such as +

+  (a+)*b
+
+where a literal character follows. Before embarking on the standard matching +procedure, PCRE2 checks that there is a "b" later in the subject string, and if +there is not, it fails the match immediately. However, when there is no +following literal this optimization cannot be used. You can see the difference +by comparing the behaviour of +
+  (a+)*\d
+
+with the pattern above. The former gives a failure almost instantly when +applied to a whole line of "a" characters, whereas the latter takes an +appreciable time with strings longer than about 20 characters. +

+

+In many cases, the solution to this kind of performance issue is to use an +atomic group or a possessive quantifier. This can often reduce memory +requirements as well. As another example, consider this pattern: +

+  ([^<]|<(?!inet))+
+
+It matches from wherever it starts until it encounters "<inet" or the end of +the data, and is the kind of pattern that might be used when processing an XML +file. Each iteration of the outer parentheses matches either one character that +is not "<" or a "<" that is not followed by "inet". However, each time a +parenthesis is processed, a backtracking position is passed, so this +formulation uses a memory frame for each matched character. For a long string, +a lot of memory is required. Consider now this rewritten pattern, which matches +exactly the same strings: +
+  ([^<]++|<(?!inet))+
+
+This runs much faster, because sequences of characters that do not contain "<" +are "swallowed" in one item inside the parentheses, and a possessive quantifier +is used to stop any backtracking into the runs of non-"<" characters. This +version also uses a lot less memory because entry to a new set of parentheses +happens only when a "<" character that is not followed by "inet" is encountered +(and we assume this is relatively rare). +

+

+This example shows that one way of optimizing performance when matching long +subject strings is to write repeated parenthesized subpatterns to match more +than one character whenever possible. +

+
+SETTING RESOURCE LIMITS +
+

+You can set limits on the amount of processing that takes place when matching, +and on the amount of heap memory that is used. The default values of the limits +are very large, and unlikely ever to operate. They can be changed when PCRE2 is +built, and they can also be set when pcre2_match() or +pcre2_dfa_match() is called. For details of these interfaces, see the +pcre2build +documentation and the section entitled +"The match context" +in the +pcre2api +documentation. +

+

+The pcre2test test program has a modifier called "find_limits" which, if +applied to a subject line, causes it to find the smallest limits that allow a +pattern to match. This is done by repeatedly matching with different limits. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 06 December 2022 +
+Copyright © 1997-2022 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2posix.html b/3rd/pcre2/doc/html/pcre2posix.html new file mode 100644 index 00000000..bc60c3b7 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2posix.html @@ -0,0 +1,379 @@ + + +pcre2posix specification + + +

pcre2posix man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+#include <pcre2posix.h> +

+

+int pcre2_regcomp(regex_t *preg, const char *pattern, + int cflags); +
+
+int pcre2_regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags); +
+
+size_t pcre2_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); +
+
+void pcre2_regfree(regex_t *preg); +

+
DESCRIPTION
+

+This set of functions provides a POSIX-style API for the PCRE2 regular +expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit +and 32-bit libraries. See the +pcre2api +documentation for a description of PCRE2's native API, which contains much +additional functionality. +

+

+IMPORTANT NOTE: The functions described here are NOT thread-safe, and +should not be used in multi-threaded applications. They are also limited to +processing subjects that are not bigger than 2GB. Use the native API instead. +

+

+These functions are wrapper functions that ultimately call the PCRE2 native +API. Their prototypes are defined in the pcre2posix.h header file, and +they all have unique names starting with pcre2_. However, the +pcre2posix.h header also contains macro definitions that convert the +standard POSIX names such regcomp() into pcre2_regcomp() etc. This +means that a program can use the usual POSIX names without running the risk of +accidentally linking with POSIX functions from a different library. +

+

+On Unix-like systems the PCRE2 POSIX library is called libpcre2-posix, so +can be accessed by adding -lpcre2-posix to the command for linking an +application. Because the POSIX functions call the native ones, it is also +necessary to add -lpcre2-8. +

+

+On Windows systems, if you are linking to a DLL version of the library, it is +recommended that PCRE2POSIX_SHARED is defined before including the +pcre2posix.h header, as it will allow for a more efficient way to +invoke the functions by adding the __declspec(dllimport) decorator. +

+

+Although they were not defined as prototypes in pcre2posix.h, releases +10.33 to 10.36 of the library contained functions with the POSIX names +regcomp() etc. These simply passed their arguments to the PCRE2 +functions. These functions were provided for backwards compatibility with +earlier versions of PCRE2, which had only POSIX names. However, this has proved +troublesome in situations where a program links with several libraries, some of +which use PCRE2's POSIX interface while others use the real POSIX functions. +For this reason, the POSIX names have been removed since release 10.37. +

+

+Calling the header file pcre2posix.h avoids any conflict with other POSIX +libraries. It can, of course, be renamed or aliased as regex.h, which is +the "correct" name, if there is no clash. It provides two structure types, +regex_t for compiled internal forms, and regmatch_t for returning +captured substrings. It also defines some constants whose names start with +"REG_"; these are used for setting options and identifying error codes. +

+
USING THE POSIX FUNCTIONS
+

+Note that these functions are just POSIX-style wrappers for PCRE2's native API. +They do not give POSIX regular expression behaviour, and they are not +thread-safe or even POSIX compatible. +

+

+Those POSIX option bits that can reasonably be mapped to PCRE2 native options +have been implemented. In addition, the option REG_EXTENDED is defined with the +value zero. This has no effect, but since programs that are written to the +POSIX interface often use it, this makes it easier to slot in PCRE2 as a +replacement library. Other POSIX options are not even defined. +

+

+There are also some options that are not defined by POSIX. These have been +added at the request of users who want to make use of certain PCRE2-specific +features via the POSIX calling interface or to add BSD or GNU functionality. +

+

+When PCRE2 is called via these functions, it is only the API that is POSIX-like +in style. The syntax and semantics of the regular expressions themselves are +still those of Perl, subject to the setting of various PCRE2 options, as +described below. "POSIX-like in style" means that the API approximates to the +POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding +domains it is probably even less compatible. +

+

+The descriptions below use the actual names of the functions, but, as described +above, the standard POSIX names (without the pcre2_ prefix) may also be +used. +

+
COMPILING A PATTERN
+

+The function pcre2_regcomp() is called to compile a pattern into an +internal form. By default, the pattern is a C string terminated by a binary +zero (but see REG_PEND below). The preg argument is a pointer to a +regex_t structure that is used as a base for storing information about +the compiled regular expression. It is also used for input when REG_PEND is +set. The regex_t structure used by pcre2_regcomp() is defined in +pcre2posix.h and is not the same as the structure used by other libraries +that provide POSIX-style matching. +

+

+The argument cflags is either zero, or contains one or more of the bits +defined by the following macros: +

+  REG_DOTALL
+
+The PCRE2_DOTALL option is set when the regular expression is passed for +compilation to the native function. Note that REG_DOTALL is not part of the +POSIX standard. +
+  REG_ICASE
+
+The PCRE2_CASELESS option is set when the regular expression is passed for +compilation to the native function. +
+  REG_NEWLINE
+
+The PCRE2_MULTILINE option is set when the regular expression is passed for +compilation to the native function. Note that this does not mimic the +defined POSIX behaviour for REG_NEWLINE (see the following section). +
+  REG_NOSPEC
+
+The PCRE2_LITERAL option is set when the regular expression is passed for +compilation to the native function. This disables all meta characters in the +pattern, causing it to be treated as a literal string. The only other options +that are allowed with REG_NOSPEC are REG_ICASE, REG_NOSUB, REG_PEND, and +REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard. +
+  REG_NOSUB
+
+When a pattern that is compiled with this flag is passed to +pcre2_regexec() for matching, the nmatch and pmatch arguments +are ignored, and no captured strings are returned. Versions of the PCRE2 library +prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this +no longer happens because it disables the use of backreferences. +
+  REG_PEND
+
+If this option is set, the reg_endp field in the preg structure +(which has the type const char *) must be set to point to the character beyond +the end of the pattern before calling pcre2_regcomp(). The pattern itself +may now contain binary zeros, which are treated as data characters. Without +REG_PEND, a binary zero terminates the pattern and the re_endp field is +ignored. This is a GNU extension to the POSIX standard and should be used with +caution in software intended to be portable to other systems. +
+  REG_UCP
+
+The PCRE2_UCP option is set when the regular expression is passed for +compilation to the native function. This causes PCRE2 to use Unicode properties +when matching \d, \w, etc., instead of just recognizing ASCII values. Note +that REG_UCP is not part of the POSIX standard. +
+  REG_UNGREEDY
+
+The PCRE2_UNGREEDY option is set when the regular expression is passed for +compilation to the native function. Note that REG_UNGREEDY is not part of the +POSIX standard. +
+  REG_UTF
+
+The PCRE2_UTF option is set when the regular expression is passed for +compilation to the native function. This causes the pattern itself and all data +strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF +is not part of the POSIX standard. +

+

+In the absence of these flags, no options are passed to the native function. +This means that the regex is compiled with PCRE2 default semantics. In +particular, the way it handles newline characters in the subject string is the +Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only +some of the effects specified for REG_NEWLINE. It does not affect the way +newlines are matched by the dot metacharacter (they are not) or by a negative +class such as [^a] (they are). +

+

+The yield of pcre2_regcomp() is zero on success, and non-zero otherwise. +The preg structure is filled in on success, and one other member of the +structure (as well as re_endp) is public: re_nsub contains the +number of capturing subpatterns in the regular expression. Various error codes +are defined in the header file. +

+

+NOTE: If the yield of pcre2_regcomp() is non-zero, you must not attempt +to use the contents of the preg structure. If, for example, you pass it +to pcre2_regexec(), the result is undefined and your program is likely to +crash. +

+
MATCHING NEWLINE CHARACTERS
+

+This area is not simple, because POSIX and Perl take different views of things. +It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was +never intended to be a POSIX engine. The following table lists the different +possibilities for matching newline characters in Perl and PCRE2: +

+                          Default   Change with
+
+  . matches newline          no     PCRE2_DOTALL
+  newline matches [^a]       yes    not changeable
+  $ matches \n at end        yes    PCRE2_DOLLAR_ENDONLY
+  $ matches \n in middle     no     PCRE2_MULTILINE
+  ^ matches \n in middle     no     PCRE2_MULTILINE
+
+This is the equivalent table for a POSIX-compatible pattern matcher: +
+                          Default   Change with
+
+  . matches newline          yes    REG_NEWLINE
+  newline matches [^a]       yes    REG_NEWLINE
+  $ matches \n at end        no     REG_NEWLINE
+  $ matches \n in middle     no     REG_NEWLINE
+  ^ matches \n in middle     no     REG_NEWLINE
+
+This behaviour is not what happens when PCRE2 is called via its POSIX +API. By default, PCRE2's behaviour is the same as Perl's, except that there is +no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there +is no way to stop newline from matching [^a]. +

+

+Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and +PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but there is +no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using +the POSIX API, passing REG_NEWLINE to PCRE2's pcre2_regcomp() function +causes PCRE2_MULTILINE to be passed to pcre2_compile(), and REG_DOTALL +passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY. +

+
MATCHING A PATTERN
+

+The function pcre2_regexec() is called to match a compiled pattern +preg against a given string, which is by default terminated by a +zero byte (but see REG_STARTEND below), subject to the options in eflags. +These can be: +

+  REG_NOTBOL
+
+The PCRE2_NOTBOL option is set when calling the underlying PCRE2 matching +function. +
+  REG_NOTEMPTY
+
+The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 matching +function. Note that REG_NOTEMPTY is not part of the POSIX standard. However, +setting this option can give more POSIX-like behaviour in some situations. +
+  REG_NOTEOL
+
+The PCRE2_NOTEOL option is set when calling the underlying PCRE2 matching +function. +
+  REG_STARTEND
+
+When this option is set, the subject string starts at string + +pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which +should point to the first character beyond the string. There may be binary +zeros within the subject string, and indeed, using REG_STARTEND is the only +way to pass a subject string that contains a binary zero. +

+

+Whatever the value of pmatch[0].rm_so, the offsets of the matched string +and any captured substrings are still given relative to the start of +string itself. (Before PCRE2 release 10.30 these were given relative to +string + pmatch[0].rm_so, but this differs from other +implementations.) +

+

+This is a BSD extension, compatible with but not specified by IEEE Standard +1003.2 (POSIX.2), and should be used with caution in software intended to be +portable to other systems. Note that a non-zero rm_so does not imply +REG_NOTBOL; REG_STARTEND affects only the location and length of the string, +not how it is matched. Setting REG_STARTEND and passing pmatch as NULL +are mutually exclusive; the error REG_INVARG is returned. +

+

+If the pattern was compiled with the REG_NOSUB flag, no data about any matched +strings is returned. The nmatch and pmatch arguments of +pcre2_regexec() are ignored (except possibly as input for REG_STARTEND). +

+

+The value of nmatch may be zero, and the value pmatch may be NULL +(unless REG_STARTEND is set); in both these cases no data about any matched +strings is returned. +

+

+Otherwise, the portion of the string that was matched, and also any captured +substrings, are returned via the pmatch argument, which points to an +array of nmatch structures of type regmatch_t, containing the +members rm_so and rm_eo. These contain the byte offset to the first +character of each substring and the offset to the first character after the end +of each substring, respectively. The 0th element of the vector relates to the +entire portion of string that was matched; subsequent elements relate to +the capturing subpatterns of the regular expression. Unused entries in the +array have both structure members set to -1. +

+

+regmatch_t as well as the regoff_t typedef it uses are defined in +pcre2posix.h and are not warranted to have the same size or layout as other +similarly named types from other libraries that provide POSIX-style matching. +

+

+A successful match yields a zero return; various error codes are defined in the +header file, of which REG_NOMATCH is the "expected" failure code. +

+
ERROR MESSAGES
+

+The pcre2_regerror() function maps a non-zero errorcode from either +pcre2_regcomp() or pcre2_regexec() to a printable message. If +preg is not NULL, the error should have arisen from the use of that +structure. A message terminated by a binary zero is placed in errbuf. If +the buffer is too short, only the first errbuf_size - 1 characters of the +error message are used. The yield of the function is the size of buffer needed +to hold the whole message, including the terminating zero. This value is +greater than errbuf_size if the message was truncated. +

+
MEMORY USAGE
+

+Compiling a regular expression causes memory to be allocated and associated +with the preg structure. The function pcre2_regfree() frees all +such memory, after which preg may no longer be used as a compiled +expression. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 27 November 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2sample.html b/3rd/pcre2/doc/html/pcre2sample.html new file mode 100644 index 00000000..0903f04f --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2sample.html @@ -0,0 +1,110 @@ + + +pcre2sample specification + + +

pcre2sample man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+PCRE2 SAMPLE PROGRAM +
+

+A simple, complete demonstration program to get you started with using PCRE2 is +supplied in the file pcre2demo.c in the src directory in the PCRE2 +distribution. A listing of this program is given in the +pcre2demo +documentation. If you do not have a copy of the PCRE2 distribution, you can +save this listing to re-create the contents of pcre2demo.c. +

+

+The demonstration program compiles the regular expression that is its +first argument, and matches it against the subject string in its second +argument. No PCRE2 options are set, and default character tables are used. If +matching succeeds, the program outputs the portion of the subject that matched, +together with the contents of any captured substrings. +

+

+If the -g option is given on the command line, the program then goes on to +check for further matches of the same regular expression in the same subject +string. The logic is a little bit tricky because of the possibility of matching +an empty string. Comments in the code explain what is going on. +

+

+The code in pcre2demo.c is an 8-bit program that uses the PCRE2 8-bit +library. It handles strings and characters that are stored in 8-bit code units. +By default, one character corresponds to one code unit, but if the pattern +starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, +where characters may occupy multiple code units. +

+

+If PCRE2 is installed in the standard include and library directories for your +operating system, you should be able to compile the demonstration program using +a command like this: +

+  cc -o pcre2demo pcre2demo.c -lpcre2-8
+
+If PCRE2 is installed elsewhere, you may need to add additional options to the +command line. For example, on a Unix-like system that has PCRE2 installed in +/usr/local, you can compile the demonstration program using a command +like this: +
+  cc -o pcre2demo -I/usr/local/include pcre2demo.c -L/usr/local/lib -lpcre2-8
+
+Once you have built the demonstration program, you can run simple tests like +this: +
+  ./pcre2demo 'cat|dog' 'the cat sat on the mat'
+  ./pcre2demo -g 'cat|dog' 'the dog sat on the cat'
+
+Note that there is a much more comprehensive test program, called +pcre2test, +which supports many more facilities for testing regular expressions using all +three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be +installed). The +pcre2demo +program is provided as a relatively simple coding example. +

+

+If you try to run +pcre2demo +when PCRE2 is not installed in the standard library directory, you may get an +error like this on some operating systems (e.g. Solaris): +

+  ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory
+
+This is caused by the way shared library support works on those systems. You +need to add +
+  -R/usr/local/lib
+
+(for example) to the compile command to get round this problem. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 14 November 2023 +
+Copyright © 1997-2016 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2serialize.html b/3rd/pcre2/doc/html/pcre2serialize.html new file mode 100644 index 00000000..d189bde2 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2serialize.html @@ -0,0 +1,212 @@ + + +pcre2serialize specification + + +

pcre2serialize man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS
+

+int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); +
+
+int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); +
+
+void pcre2_serialize_free(uint8_t *bytes); +
+
+int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); +
+
+If you are running an application that uses a large number of regular +expression patterns, it may be useful to store them in a precompiled form +instead of having to compile them every time the application is run. However, +if you are using the just-in-time optimization feature, it is not possible to +save and reload the JIT data, because it is position-dependent. The host on +which the patterns are reloaded must be running the same version of PCRE2, with +the same code unit width, and must also have the same endianness, pointer width +and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using +PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be +reloaded using the 8-bit library. +

+

+Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET serialization. The serialized output is +really just a bytecode dump, which is why it can only be reloaded in the same +environment as the one that created it. Hence the restrictions mentioned above. +Applications that are not statically linked with a fixed version of PCRE2 must +be prepared to recompile patterns from their sources, in order to be immune to +PCRE2 upgrades. +

+
SECURITY CONCERNS
+

+The facility for saving and restoring compiled patterns is intended for use +within individual applications. As such, the data supplied to +pcre2_serialize_decode() is expected to be trusted data, not data from +arbitrary external sources. There is only some simple consistency checking, not +complete validation of what is being re-loaded. Corrupted data may cause +undefined results. For example, if the length field of a pattern in the +serialized data is corrupted, the deserializing code may read beyond the end of +the byte stream that is passed to it. +

+
SAVING COMPILED PATTERNS
+

+Before compiled patterns can be saved they must be serialized, which in PCRE2 +means converting the pattern to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). For more details of character tables, see the +section on locale support +in the +pcre2api +documentation. +

+

+The function pcre2_serialize_encode() creates a serialized byte stream +from a list of compiled patterns. Its first two arguments specify the list, +being a pointer to a vector of pointers to compiled patterns, and the length of +the vector. The third and fourth arguments point to variables which are set to +point to the created byte stream and its length, respectively. The final +argument is a pointer to a general context, which can be used to specify custom +memory management functions. If this argument is NULL, malloc() is used +to obtain memory for the byte stream. The yield of the function is the number +of serialized patterns, or one of the following negative error codes: +

+  PCRE2_ERROR_BADDATA      the number of patterns is zero or less
+  PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
+  PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
+  PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
+
+PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +

+

+Once a set of patterns has been serialized you can save the data in any +appropriate manner. Here is sample code that compiles two patterns and writes +them to a file. It assumes that the variable fd refers to a file that is +open for output. The error checking that should be present in a real +application has been omitted for simplicity. +

+  int errorcode;
+  uint8_t *bytes;
+  PCRE2_SIZE erroroffset;
+  PCRE2_SIZE bytescount;
+  pcre2_code *list_of_codes[2];
+  list_of_codes[0] = pcre2_compile("first pattern",
+    PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL);
+  list_of_codes[1] = pcre2_compile("second pattern",
+    PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL);
+  errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes,
+    &bytescount, NULL);
+  errorcode = fwrite(bytes, 1, bytescount, fd);
+
+Note that the serialized data is binary data that may contain any of the 256 +possible byte values. On systems that make a distinction between binary and +non-binary data, be sure that the file is opened for binary output. +

+

+Serializing a set of patterns leaves the original data untouched, so they can +still be used for matching. Their memory must eventually be freed in the usual +way by calling pcre2_code_free(). When you have finished with the byte +stream, it too must be freed by calling pcre2_serialize_free(). If this +function is called with a NULL argument, it returns immediately without doing +anything. +

+
RE-USING PRECOMPILED PATTERNS
+

+In order to re-use a set of saved patterns you must first make the serialized +byte stream available in main memory (for example, by reading from a file). The +management of this memory block is up to the application. You can use the +pcre2_serialize_get_number_of_codes() function to find out how many +compiled patterns are in the serialized data without actually decoding the +patterns: +

+  uint8_t *bytes = <serialized data>;
+  int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes);
+
+The pcre2_serialize_decode() function reads a byte stream and recreates +the compiled patterns in new memory blocks, setting pointers to them in a +vector. The first two arguments are a pointer to a suitable vector and its +length, and the third argument points to a byte stream. The final argument is a +pointer to a general context, which can be used to specify custom memory +management functions for the decoded patterns. If this argument is NULL, +malloc() and free() are used. After deserialization, the byte +stream is no longer needed and can be discarded. +
+  pcre2_code *list_of_codes[2];
+  uint8_t *bytes = <serialized data>;
+  int32_t number_of_codes =
+    pcre2_serialize_decode(list_of_codes, 2, bytes, NULL);
+
+If the vector is not large enough for all the patterns in the byte stream, it +is filled with those that fit, and the remainder are ignored. The yield of the +function is the number of decoded patterns, or one of the following negative +error codes: +
+  PCRE2_ERROR_BADDATA    second argument is zero or less
+  PCRE2_ERROR_BADMAGIC   mismatch of id bytes in the data
+  PCRE2_ERROR_BADMODE    mismatch of code unit size or PCRE2 version
+  PCRE2_ERROR_BADSERIALIZEDDATA  other sanity check failure
+  PCRE2_ERROR_MEMORY     memory allocation failed
+  PCRE2_ERROR_NULL       first or third argument is NULL
+
+PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +

+

+Decoded patterns can be used for matching in the usual way, and must be freed +by calling pcre2_code_free(). However, be aware that there is a potential +race issue if you are using multiple patterns that were decoded from a single +byte stream in a multithreaded application. A single copy of the character +tables is used by all the decoded patterns and a reference count is used to +arrange for its memory to be automatically freed when the last pattern is +freed, but there is no locking on this reference count. Therefore, if you want +to call pcre2_code_free() for these patterns in different threads, you +must arrange your own locking, and ensure that pcre2_code_free() cannot +be called by two threads at the same time. +

+

+If a pattern was processed by pcre2_jit_compile() before being +serialized, the JIT data is discarded and so is no longer available after a +save/restore cycle. You can, however, process a restored pattern with +pcre2_jit_compile() if you wish. +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 19 January 2024 +
+Copyright © 1997-2018 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2syntax.html b/3rd/pcre2/doc/html/pcre2syntax.html new file mode 100644 index 00000000..46da3d71 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2syntax.html @@ -0,0 +1,754 @@ + + +pcre2syntax specification + + +

pcre2syntax man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY
+

+The full syntax and semantics of the regular expression patterns that are +supported by PCRE2 are described in the +pcre2pattern +documentation. This document contains a quick-reference summary of the pattern +syntax followed by the syntax of replacement strings in substitution function. +The full description of the latter is in the +pcre2api +documentation. +

+
QUOTING
+

+

+  \x         where x is non-alphanumeric is a literal x
+  \Q...\E    treat enclosed characters as literal
+
+Note that white space inside \Q...\E is always treated as literal, even if +PCRE2_EXTENDED is set, causing most other white space to be ignored. Note also +that PCRE2's handling of \Q...\E has some differences from Perl's. See the +pcre2pattern +documentation for details. +

+
BRACED ITEMS
+

+With one exception, wherever brace characters { and } are required to enclose +data for constructions such as \g{2} or \k{name}, space and/or horizontal tab +characters that follow { or precede } are allowed and are ignored. In the case +of quantifiers, they may also appear before or after the comma. The exception +is \u{...} which is not Perl-compatible and is recognized only when +PCRE2_EXTRA_ALT_BSUX is set. This is an ECMAScript compatibility feature, and +follows ECMAScript's behaviour. +

+
ESCAPED CHARACTERS
+

+This table applies to ASCII and Unicode environments. An unrecognized escape +sequence causes an error. +

+  \a         alarm, that is, the BEL character (hex 07)
+  \cx        "control-x", where x is a non-control ASCII character
+  \e         escape (hex 1B)
+  \f         form feed (hex 0C)
+  \n         newline (hex 0A)
+  \r         carriage return (hex 0D)
+  \t         tab (hex 09)
+  \0dd       character with octal code 0dd
+  \ddd       character with octal code ddd, or backreference
+  \o{ddd..}  character with octal code ddd..
+  \N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
+  \xhh       character with hex code hh
+  \x{hh..}   character with hex code hh..
+
+\N{U+hh..} is synonymous with \x{hh..} but is not supported in environments +that use EBCDIC code (mainly IBM mainframes). Note that \N not followed by an +opening curly bracket has a different meaning (see below). +

+

+If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the +following are also recognized: +

+  \U         the character "U"
+  \uhhhh     character with hex code hhhh
+  \u{hh..}   character with hex code hh.. but only for EXTRA_ALT_BSUX
+
+When \x is not followed by {, one or two hexadecimal digits are read, +but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be +recognized as a hexadecimal escape; otherwise it matches a literal "x". +Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits +or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it +matches a literal "u". +

+

+Note that \0dd is always an octal code. The treatment of backslash followed by +a non-zero digit is complicated; for details see the section +"Non-printing characters" +in the +pcre2pattern +documentation, where details of escape processing in EBCDIC environments are +also given. +

+
CHARACTER TYPES
+

+

+  .          any character except newline;
+               in dotall mode, any character whatsoever
+  \C         one code unit, even in UTF mode (best avoided)
+  \d         a decimal digit
+  \D         a character that is not a decimal digit
+  \h         a horizontal white space character
+  \H         a character that is not a horizontal white space character
+  \N         a character that is not a newline
+  \p{xx}     a character with the xx property
+  \P{xx}     a character without the xx property
+  \R         a newline sequence
+  \s         a white space character
+  \S         a character that is not a white space character
+  \v         a vertical white space character
+  \V         a character that is not a vertical white space character
+  \w         a "word" character
+  \W         a "non-word" character
+  \X         a Unicode extended grapheme cluster
+
+\C is dangerous because it may leave the current matching point in the middle +of a UTF-8 or UTF-16 character. The application can lock out the use of \C by +setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 +with the use of \C permanently disabled. +

+

+By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode +or in the 16-bit and 32-bit libraries. However, if locale-specific matching is +happening, \s and \w may also match characters with code points in the range +128-255. If the PCRE2_UCP option is set, the behaviour of these escape +sequences is changed to use Unicode properties and they match many more +characters, but there are some option settings that can restrict individual +sequences to matching only ASCII characters. +

+

+Property descriptions in \p and \P are matched caselessly; hyphens, +underscores, and ASCII white space characters are ignored, in accordance with +Unicode's "loose matching" rules. For example, \p{Bidi_Class=al} is the same +as \p{ bidi class = AL }. +

+
GENERAL CATEGORY PROPERTIES FOR \p and \P
+

+

+  C          Other
+  Cc         Control
+  Cf         Format
+  Cn         Unassigned
+  Co         Private use
+  Cs         Surrogate
+
+  L          Letter
+  Lc         Cased letter, the union of Ll, Lu, and Lt
+  L&         Synonym of Lc
+  Ll         Lower case letter
+  Lm         Modifier letter
+  Lo         Other letter
+  Lt         Title case letter
+  Lu         Upper case letter
+
+  M          Mark
+  Mc         Spacing mark
+  Me         Enclosing mark
+  Mn         Non-spacing mark
+
+  N          Number
+  Nd         Decimal number
+  Nl         Letter number
+  No         Other number
+
+  P          Punctuation
+  Pc         Connector punctuation
+  Pd         Dash punctuation
+  Pe         Close punctuation
+  Pf         Final punctuation
+  Pi         Initial punctuation
+  Po         Other punctuation
+  Ps         Open punctuation
+
+  S          Symbol
+  Sc         Currency symbol
+  Sk         Modifier symbol
+  Sm         Mathematical symbol
+  So         Other symbol
+
+  Z          Separator
+  Zl         Line separator
+  Zp         Paragraph separator
+  Zs         Space separator
+
+From release 10.45, when caseless matching is set, Ll, Lu, and Lt are all +equivalent to Lc. +

+
PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P
+

+

+  Xan        Alphanumeric: union of properties L and N
+  Xps        POSIX space: property Z or tab, NL, VT, FF, CR
+  Xsp        Perl space: property Z or tab, NL, VT, FF, CR
+  Xuc        Universally-named character: one that can be
+               represented by a Universal Character Name
+  Xwd        Perl word: property Xan or underscore
+
+Perl and POSIX space are now the same. Perl added VT to its space character set +at release 5.18. +

+
BINARY PROPERTIES FOR \p AND \P
+

+Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\p and \P, along with their abbreviations, by running this command: +

+  pcre2test -LP
+
+

+
SCRIPT MATCHING WITH \p AND \P
+

+Many script names and their 4-letter abbreviations are recognized in +\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of +course). You can obtain a list of these scripts by running this command: +

+  pcre2test -LS
+
+

+
THE BIDI_CLASS PROPERTY FOR \p AND \P
+

+

+  \p{Bidi_Class:<class>}   matches a character with the given class
+  \p{BC:<class>}           matches a character with the given class
+
+The recognized classes are: +
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          white space
+
+

+
CHARACTER CLASSES
+

+

+  [...]       positive character class
+  [^...]      negative character class
+  [x-y]       range (can be used for hex characters)
+  [[:xxx:]]   positive POSIX named set
+  [[:^xxx:]]  negative POSIX named set
+
+  alnum       alphanumeric
+  alpha       alphabetic
+  ascii       0-127
+  blank       space or tab
+  cntrl       control character
+  digit       decimal digit
+  graph       printing, excluding space
+  lower       lower case letter
+  print       printing, including space
+  punct       printing, excluding alphanumeric
+  space       white space
+  upper       upper case letter
+  word        same as \w
+  xdigit      hexadecimal digit
+
+In PCRE2, POSIX character set names recognize only ASCII characters by default, +but some of them use Unicode properties if PCRE2_UCP is set. You can use +\Q...\E inside a character class. +

+

+When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes may be +used, allowing nested character classes, combined using set operators. +

+  [x&&[^y]]   UTS#18 extended character class
+
+  x||y        set union (OR)
+  x&&y        set intersection (AND)
+  x--y        set difference (AND NOT)
+  x~~y        set symmetric difference (XOR)
+
+
+

+
PERL EXTENDED CHARACTER CLASSES
+

+

+  (?[...])                Perl extended character class
+  (?[\p{Thai} & \p{Nd}])  operators; whitespace ignored
+  (?[(x - y) & z])        parentheses for grouping
+
+  (?[ [^3] & \p{Nd} ])    [...] is a nested ordinary class
+  (?[ [:alpha:] - [z] ])  POSIX set is allowed outside [...]
+  (?[ \d - [3] ])         backslash-escaped set is allowed outside [...]
+  (?[ !\n & [:ascii:] ])  backslash-escaped character is allowed outside [...]
+                      all other characters or ranges must be enclosed in [...]
+
+  x|y, x+y                set union (OR)
+  x&y                     set intersection (AND)
+  x-y                     set difference (AND NOT)
+  x^y                     set symmetric difference (XOR)
+  !x                      set complement (NOT)
+
+Inside a Perl extended character class, [...] switches mode to be interpreted +as an ordinary character class. Outside of a nested [...], the only items +permitted are backslash-escapes, POSIX sets, operators, and parentheses. Inside +a nested ordinary class, ^ has its usual meaning (inverts the class when used +as the first character); outside of a nested class, ^ is the XOR operator. +

+
QUANTIFIERS
+

+

+  ?           0 or 1, greedy
+  ?+          0 or 1, possessive
+  ??          0 or 1, lazy
+  *           0 or more, greedy
+  *+          0 or more, possessive
+  *?          0 or more, lazy
+  +           1 or more, greedy
+  ++          1 or more, possessive
+  +?          1 or more, lazy
+  {n}         exactly n
+  {n,m}       at least n, no more than m, greedy
+  {n,m}+      at least n, no more than m, possessive
+  {n,m}?      at least n, no more than m, lazy
+  {n,}        n or more, greedy
+  {n,}+       n or more, possessive
+  {n,}?       n or more, lazy
+  {,m}        zero up to m, greedy
+  {,m}+       zero up to m, possessive
+  {,m}?       zero up to m, lazy
+
+

+
ANCHORS AND SIMPLE ASSERTIONS
+

+

+  \b          word boundary
+  \B          not a word boundary
+  ^           start of subject
+                also after an internal newline in multiline mode
+                (after any newline if PCRE2_ALT_CIRCUMFLEX is set)
+  \A          start of subject
+  $           end of subject
+                also before newline at end of subject
+                also before internal newline in multiline mode
+  \Z          end of subject
+                also before newline at end of subject
+  \z          end of subject
+  \G          first matching position in subject
+
+

+
REPORTED MATCH POINT SETTING
+

+

+  \K          set reported start of match
+
+From release 10.38 \K is not permitted by default in lookaround assertions, +for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option is set, the previous behaviour is re-enabled. When this option is set, +\K is honoured in positive assertions, but ignored in negative ones. +

+
ALTERNATION
+

+

+  expr|expr|expr...
+
+

+
CAPTURING
+

+

+  (...)           capture group
+  (?<name>...)    named capture group (Perl)
+  (?'name'...)    named capture group (Perl)
+  (?P<name>...)   named capture group (Python)
+  (?:...)         non-capture group
+  (?|...)         non-capture group; reset group numbers for
+                   capture groups in each alternative
+
+In non-UTF modes, names may contain underscores and ASCII letters and digits; +in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In +both cases, a name must not start with a digit. +

+
ATOMIC GROUPS
+

+

+  (?>...)         atomic non-capture group
+  (*atomic:...)   atomic non-capture group
+
+

+
COMMENT
+

+

+  (?#....)        comment (not nestable)
+
+

+
OPTION SETTING
+

+Changes of these options within a group are automatically cancelled at the end +of the group. +

+  (?a)            all ASCII options
+  (?aD)           restrict \d to ASCII in UCP mode
+  (?aS)           restrict \s to ASCII in UCP mode
+  (?aW)           restrict \w to ASCII in UCP mode
+  (?aP)           restrict all POSIX classes to ASCII in UCP mode
+  (?aT)           restrict POSIX digit classes to ASCII in UCP mode
+  (?i)            caseless
+  (?J)            allow duplicate named groups
+  (?m)            multiline
+  (?n)            no auto capture
+  (?r)            restrict caseless to either ASCII or non-ASCII
+  (?s)            single line (dotall)
+  (?U)            default ungreedy (lazy)
+  (?x)            ignore white space except in classes or \Q...\E
+  (?xx)           as (?x) but also ignore space and tab in classes
+  (?-...)         unset the given option(s)
+  (?^)            unset imnrsx options
+
+(?aP) implies (?aT) as well, though this has no additional effect. However, it +means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for +POSIX classes. +

+

+Unsetting x or xx unsets both. Several options may be set at once, and a +mixture of setting and unsetting such as (?i-x) is allowed, but there may be +only one hyphen. Setting (but no unsetting) is allowed after (?^ for example +(?^in). An option setting may appear at the start of a non-capture group, for +example (?i:...). +

+

+The following are recognized only at the very start of a pattern or after one +of the newline or \R sequences or options with similar syntax. More than one +of them may appear. For the first three, d is a decimal number. +

+  (*LIMIT_DEPTH=d)     set the backtracking limit to d
+  (*LIMIT_HEAP=d)      set the heap size limit to d * 1024 bytes
+  (*LIMIT_MATCH=d)     set the match limit to d
+  (*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching
+  (*NOTEMPTY)          set PCRE2_NOTEMPTY when matching
+  (*NOTEMPTY_ATSTART)  set PCRE2_NOTEMPTY_ATSTART when matching
+  (*NO_AUTO_POSSESS)   no auto-possessification (PCRE2_NO_AUTO_POSSESS)
+  (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR)
+  (*NO_JIT)            disable JIT optimization
+  (*NO_START_OPT)      no start-match optimization (PCRE2_NO_START_OPTIMIZE)
+  (*TURKISH_CASING)    set PCRE2_EXTRA_TURKISH_CASING when matching
+  (*UTF)               set appropriate UTF mode for the library in use
+  (*UCP)               set PCRE2_UCP (use Unicode properties for \d etc)
+
+Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of +the limits set by the caller of pcre2_match() or pcre2_dfa_match(), +not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The +application can lock out the use of (*UTF) and (*UCP) by setting the +PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time. +

+
NEWLINE CONVENTION
+

+These are recognized only at the very start of the pattern or after option +settings with a similar syntax. +

+  (*CR)           carriage return only
+  (*LF)           linefeed only
+  (*CRLF)         carriage return followed by linefeed
+  (*ANYCRLF)      all three of the above
+  (*ANY)          any Unicode newline sequence
+  (*NUL)          the NUL character (binary zero)
+
+

+
WHAT \R MATCHES
+

+These are recognized only at the very start of the pattern or after option +setting with a similar syntax. +

+  (*BSR_ANYCRLF)  CR, LF, or CRLF
+  (*BSR_UNICODE)  any Unicode newline sequence
+
+

+
LOOKAHEAD AND LOOKBEHIND ASSERTIONS
+

+

+  (?=...)                     )
+  (*pla:...)                  ) positive lookahead
+  (*positive_lookahead:...)   )
+
+  (?!...)                     )
+  (*nla:...)                  ) negative lookahead
+  (*negative_lookahead:...)   )
+
+  (?<=...)                    )
+  (*plb:...)                  ) positive lookbehind
+  (*positive_lookbehind:...)  )
+
+  (?<!...)                    )
+  (*nlb:...)                  ) negative lookbehind
+  (*negative_lookbehind:...)  )
+
+Each top-level branch of a lookbehind must have a limit for the number of +characters it matches. If any branch can match a variable number of characters, +the maximum for each branch is limited to a value set by the caller of +pcre2_compile() or defaulted. The default is set when PCRE2 is built +(ultimate default 255). If every branch matches a fixed number of characters, +the limit for each branch is 65535 characters. +

+
NON-ATOMIC LOOKAROUND ASSERTIONS
+

+These assertions are specific to PCRE2 and are not Perl-compatible. +

+  (?*...)                                )
+  (*napla:...)                           ) synonyms
+  (*non_atomic_positive_lookahead:...)   )
+
+  (?<*...)                               )
+  (*naplb:...)                           ) synonyms
+  (*non_atomic_positive_lookbehind:...)  )
+
+

+
SUBSTRING SCAN ASSERTION
+

+This feature is not Perl-compatible. +

+  (*scan_substring:(grouplist)...)  scan captured substring
+  (*scs:(grouplist)...)             scan captured substring
+
+The comma-separated list may identify groups in any of the following ways: +
+  n       absolute reference
+  +n      relative reference
+  -n      relative reference
+  <name>  name
+  'name'  name
+
+
+

+
SCRIPT RUNS
+

+

+  (*script_run:...)           ) script run, can be backtracked into
+  (*sr:...)                   )
+
+  (*atomic_script_run:...)    ) atomic script run
+  (*asr:...)                  )
+
+

+
BACKREFERENCES
+

+

+  \n              reference by number (can be ambiguous)
+  \gn             reference by number
+  \g{n}           reference by number
+  \g+n            relative reference by number (PCRE2 extension)
+  \g-n            relative reference by number
+  \g{+n}          relative reference by number (PCRE2 extension)
+  \g{-n}          relative reference by number
+  \k<name>        reference by name (Perl)
+  \k'name'        reference by name (Perl)
+  \g{name}        reference by name (Perl)
+  \k{name}        reference by name (.NET)
+  (?P=name)       reference by name (Python)
+
+

+
SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)
+

+

+  (?R)            recurse whole pattern
+  (?n)            call subroutine by absolute number
+  (?+n)           call subroutine by relative number
+  (?-n)           call subroutine by relative number
+  (?&name)        call subroutine by name (Perl)
+  (?P>name)       call subroutine by name (Python)
+  \g<name>        call subroutine by name (Oniguruma)
+  \g'name'        call subroutine by name (Oniguruma)
+  \g<n>           call subroutine by absolute number (Oniguruma)
+  \g'n'           call subroutine by absolute number (Oniguruma)
+  \g<+n>          call subroutine by relative number (PCRE2 extension)
+  \g'+n'          call subroutine by relative number (PCRE2 extension)
+  \g<-n>          call subroutine by relative number (PCRE2 extension)
+  \g'-n'          call subroutine by relative number (PCRE2 extension)
+
+

+
CONDITIONAL PATTERNS
+

+

+  (?(condition)yes-pattern)
+  (?(condition)yes-pattern|no-pattern)
+
+  (?(n)               absolute reference condition
+  (?(+n)              relative reference condition (PCRE2 extension)
+  (?(-n)              relative reference condition (PCRE2 extension)
+  (?(<name>)          named reference condition (Perl)
+  (?('name')          named reference condition (Perl)
+  (?(name)            named reference condition (PCRE2, deprecated)
+  (?(R)               overall recursion condition
+  (?(Rn)              specific numbered group recursion condition
+  (?(R&name)          specific named group recursion condition
+  (?(DEFINE)          define groups for reference
+  (?(VERSION[>]=n.m)  test PCRE2 version
+  (?(assert)          assertion condition
+
+Note the ambiguity of (?(R) and (?(Rn) which might be named reference +conditions or recursion tests. Such a condition is interpreted as a reference +condition if the relevant named group exists. +

+
BACKTRACKING CONTROL
+

+All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the +name is mandatory, for the others it is optional. (*SKIP) changes its behaviour +if :NAME is present. The others just set a name for passing back to the caller, +but this is not a name that (*SKIP) can see. The following act immediately they +are reached: +

+  (*ACCEPT)       force successful match
+  (*FAIL)         force backtrack; synonym (*F)
+  (*MARK:NAME)    set name to be passed back; synonym (*:NAME)
+
+The following act only when a subsequent match failure causes a backtrack to +reach them. They all force a match failure, but they differ in what happens +afterwards. Those that advance the start-of-match point do so only if the +pattern is not anchored. +
+  (*COMMIT)       overall failure, no advance of starting point
+  (*PRUNE)        advance to next starting character
+  (*SKIP)         advance to current matching position
+  (*SKIP:NAME)    advance to position corresponding to an earlier
+                  (*MARK:NAME); if not found, the (*SKIP) is ignored
+  (*THEN)         local failure, backtrack to next alternation
+
+The effect of one of these verbs in a group called as a subroutine is confined +to the subroutine call. +

+
CALLOUTS
+

+

+  (?C)            callout (assumed number 0)
+  (?Cn)           callout with numerical data n
+  (?C"text")      callout with string data
+
+The allowed string delimiters are ` ' " ^ % # $ (which are the same for the +start and the end), and the starting delimiter { matched with the ending +delimiter }. To encode the ending delimiter within the string, double it. +

+
REPLACEMENT STRINGS
+

+If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for +pcre2_substitute() is not interpreted. Otherwise, by default, the only +special character is the dollar character in one of the following forms: +

+  $$                  insert a dollar character
+  $n or ${n}          insert the contents of group n
+  $<name>             insert the contents of named group
+  $0 or $&            insert the entire matched substring
+  $`                  insert the substring that precedes the match
+  $'                  insert the substring that follows the match
+  $_                  insert the entire input string
+  $*MARK or ${*MARK}  insert a control verb name
+
+For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is set, +there is additional interpretation: +

+

+1. Backslash is an escape character, and the forms described in "ESCAPED +CHARACTERS" above are recognized. Also: +

+  \Q...\E   can be used to suppress interpretation
+  \l        force the next character to lower case
+  \u        force the next character to upper case
+  \L        force subsequent characters to lower case
+  \U        force subsequent characters to upper case
+  \u\L      force next character to upper case, then all lower
+  \l\U      force next character to lower case, then all upper
+  \E        end \L or \U case forcing
+  \b        backspace character (note: as in character class in pattern)
+  \v        vertical tab character (note: not the same as in a pattern)
+
+2. The Python form \g<n>, where the angle brackets are part of the syntax and +n is either a group name or a number, is recognized as an alternative way +of inserting the contents of a group, for example \g<3>. +

+

+3. Capture substitution supports the following additional forms: +

+  ${n:-string}             default for unset group
+  ${n:+string1:string2}    values for set/unset group
+
+The substitution strings themselves are expanded. Backslash can be used to +escape colons and closing curly brackets. +

+
SEE ALSO
+

+pcre2pattern(3), pcre2api(3), pcre2callout(3), +pcre2matching(3), pcre2(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 27 November 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2test.html b/3rd/pcre2/doc/html/pcre2test.html new file mode 100644 index 00000000..db9073f0 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2test.html @@ -0,0 +1,2273 @@ + + +pcre2test specification + + +

pcre2test man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+

+
SYNOPSIS
+

+pcre2test [options] [input file [output file]] +
+
+pcre2test is a test program for the PCRE2 regular expression libraries, +but it can also be used for experimenting with regular expressions. This +document describes the features of the test program; for details of the regular +expressions themselves, see the +pcre2pattern +documentation. For details of the PCRE2 library function calls and their +options, see the +pcre2api +documentation. +

+

+The input for pcre2test is a sequence of regular expression patterns and +subject strings to be matched. There are also command lines for setting +defaults and controlling some special actions. The output shows the result of +each match attempt. Modifiers on external or internal command lines, the +patterns, and the subject lines specify PCRE2 function options, control how the +subject is processed, and what output is produced. +

+

+There are many obscure modifiers, some of which are specifically designed for +use in conjunction with the test script and data files that are distributed as +part of PCRE2. All the modifiers are documented here, some without much +justification, but many of them are unlikely to be of use except when testing +the libraries. +

+
PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES
+

+Different versions of the PCRE2 library can be built to support character +strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or +all three of these libraries may be simultaneously installed. The +pcre2test program can be used to test all the libraries. However, its own +input and output are always in 8-bit format. When testing the 16-bit or 32-bit +libraries, patterns and subject strings are converted to 16-bit or 32-bit +format before being passed to the library functions. Results are converted back +to 8-bit code units for output. +

+

+In the rest of this document, the names of library functions and structures +are given in generic form, for example, pcre2_compile(). The actual +names used in the libraries have a suffix _8, _16, or _32, as appropriate. +

+
INPUT ENCODING
+

+Input to pcre2test is processed line by line, either by calling the C +library's fgets() function, or via the libreadline or libedit +library. In some Windows environments character 26 (hex 1A) causes an immediate +end of file, and no further data is read, so this character should be avoided +unless you really want that action. +

+

+The input is processed using C's string functions, so must not contain binary +zeros, even though in Unix-like environments, fgets() treats any bytes +other than newline as data characters. An error is generated if a binary zero +is encountered. By default subject lines are processed for backslash escapes, +which makes it possible to include any data value in strings that are passed to +the library for matching. For patterns, there is a facility for specifying some +or all of the 8-bit input characters as hexadecimal pairs, which makes it +possible to include binary zeros. +

+
+Input for the 16-bit and 32-bit libraries +
+

+When testing the 16-bit or 32-bit libraries, there is a need to be able to +generate character code points greater than 255 in the strings that are passed +to the library. For subject lines and some patterns, backslash escapes can be +used. In addition, when the utf modifier (see +"Setting compilation options" +below) is set, the pattern and any following subject lines are interpreted as +UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. +

+

+For non-UTF testing of wide characters, the utf8_input modifier can be +used. This is mutually exclusive with utf, and is allowed only in 16-bit +or 32-bit mode. It causes the pattern and following subject lines to be treated +as UTF-8 according to the original definition (RFC 2279), which allows for +character values up to 0x7fffffff. Each character is placed in one 16-bit or +32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error +to occur). +

+

+UTF-8 (in its original definition) is not capable of encoding values greater +than 0x7fffffff, but such values can be handled by the 32-bit library. When +testing this library in non-UTF mode with utf8_input set, if any +character is preceded by the byte 0xff (which is an invalid byte in UTF-8) +0x80000000 is added to the character's value. For subject strings, using an +escape sequence is preferable. +

+
COMMAND LINE OPTIONS
+

+-8 +If the 8-bit library has been built, this option causes it to be used (this is +the default). If the 8-bit library has not been built, this option causes an +error. +

+

+-16 +If the 16-bit library has been built, this option causes it to be used. If the +8-bit library has not been built, this is the default. If the 16-bit library +has not been built, this option causes an error. +

+

+-32 +If the 32-bit library has been built, this option causes it to be used. If no +other library has been built, this is the default. If the 32-bit library has +not been built, this option causes an error. +

+

+-ac +Behave as if each pattern has the auto_callout modifier, that is, insert +automatic callouts into every pattern that is compiled. +

+

+-AC +As for -ac, but in addition behave as if each subject line has the +callout_extra modifier, that is, show additional information from +callouts. +

+

+-b +Behave as if each pattern has the fullbincode modifier; the full +internal binary form of the pattern is output after compilation. +

+

+-C +Output the version number of the PCRE2 library, and all available information +about the optional features that are included, and then exit with zero exit +code. All other options are ignored. If both -C and -LM are present, whichever +is first is recognized. +

+

+-C option +Output information about a specific build-time option, then exit. This +functionality is intended for use in scripts such as RunTest. The +following options output the value and set the exit code as indicated: +

+  ebcdic-nl  the code for LF (= NL) in an EBCDIC environment:
+               either 0x15 or 0x25
+               0 if used in an ASCII/Unicode environment
+               exit code is always 0
+  linksize   the configured internal link size (2, 3, or 4)
+               exit code is set to the link size
+  newline    the default newline setting:
+               CR, LF, CRLF, ANYCRLF, ANY, or NUL
+               exit code is always 0
+  bsr        the default setting for what \R matches:
+               ANYCRLF or ANY
+               exit code is always 0
+
+The following options output 1 for true or 0 for false, and set the exit code +to the same value: +
+  backslash-C  \C is supported (not locked out)
+  ebcdic       compiled for an EBCDIC environment
+  jit          just-in-time support is available
+  pcre2-16     the 16-bit library was built
+  pcre2-32     the 32-bit library was built
+  pcre2-8      the 8-bit library was built
+  unicode      Unicode support is available
+
+Note that the availability of JIT support in the library does not guarantee +that it can actually be used because in some environments it is unable to +allocate executable memory. The option "jitusable" gives more detailed +information. It returns one of the following values: +
+  0  JIT is available and usable
+  1  JIT is available but cannot allocate executable memory
+  2  JIT is not available
+  3  Unexpected return from test call to pcre2_jit_compile()
+
+If an unknown option is given, an error message is output; the exit code is 0. +

+

+-d +Behave as if each pattern has the debug modifier; the internal +form and information about the compiled pattern is output after compilation; +-d is equivalent to -b -i. +

+

+-dfa +Behave as if each subject line has the dfa modifier; matching is done +using the pcre2_dfa_match() function instead of the default +pcre2_match(). +

+

+-error number[,number,...] +Call pcre2_get_error_message() for each of the error numbers in the +comma-separated list, display the resulting messages on the standard output, +then exit with zero exit code. The numbers may be positive or negative. This is +a convenience facility for PCRE2 maintainers. +

+

+-help +Output a brief summary these options and then exit. +

+

+-i +Behave as if each pattern has the info modifier; information about the +compiled pattern is given after compilation. +

+

+-jit +Behave as if each pattern line has the jit modifier; after successful +compilation, each pattern is passed to the just-in-time compiler, if available. +

+

+-jitfast +Behave as if each pattern line has the jitfast modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and each subject line is passed directly to the JIT matcher via its +"fast path". +

+

+-jitverify +Behave as if each pattern line has the jitverify modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and the use of JIT for matching is verified. +

+

+-LM +List modifiers: write a list of available pattern and subject modifiers to the +standard output, then exit with zero exit code. All other options are ignored. +If both -C and any -Lx options are present, whichever is first is recognized. +

+

+-LP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +

+

+-LS +List scripts: write a list of recognized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +

+

+-pattern modifier-list +Behave as if each pattern line contains the given modifiers. +

+

+-q +Do not output the version number of pcre2test at the start of execution. +

+

+-S size +On Unix-like systems, set the size of the run-time stack to size +mebibytes (units of 1024*1024 bytes). +

+

+-subject modifier-list +Behave as if each subject line contains the given modifiers. +

+

+-t +Run each compile and match many times with a timer, and output the resulting +times per compile or match. When JIT is used, separate times are given for the +initial compile and the JIT compile. You can control the number of iterations +that are used for timing by following -t with a number (as a separate +item on the command line). For example, "-t 1000" iterates 1000 times. The +default is to iterate 500,000 times. +

+

+-tm +This is like -t except that it times only the matching phase, not the +compile phase. +

+

+-T -TM +These behave like -t and -tm, but in addition, at the end of a run, +the total times for all compiles and matches are output. +

+

+-version +Output the PCRE2 version number and then exit. +

+
DESCRIPTION
+

+If pcre2test is given two filename arguments, it reads from the first and +writes to the second. If the first name is "-", input is taken from the +standard input. If pcre2test is given only one argument, it reads from +that file and writes to stdout. Otherwise, it reads from stdin and writes to +stdout. +

+

+When pcre2test is built, a configuration option can specify that it +should be linked with the libreadline or libedit library. When this +is done, if the input is from a terminal, it is read using the readline() +function. This provides line-editing and history facilities. The output from +the -help option states whether or not readline() will be used. +

+

+The program handles any number of tests, each of which consists of a set of +input lines. Each set starts with a regular expression pattern, followed by any +number of subject lines to be matched against that pattern. In between sets of +test data, command lines that begin with # may appear. This file format, with +some restrictions, can also be processed by the perltest.sh script that +is distributed with PCRE2 as a means of checking that the behaviour of PCRE2 +and Perl is the same. For a specification of perltest.sh, see the +comments near its beginning. See also the #perltest command below. +

+

+When the input is a terminal, pcre2test prompts for each line of input, +using "re>" to prompt for regular expression patterns, and "data>" to prompt +for subject lines. Command lines starting with # can be entered only in +response to the "re>" prompt. +

+

+Each subject line is matched separately and independently. If you want to do +multi-line matches, you have to use the \n escape sequence (or \r or \r\n, +etc., depending on the newline setting) in a single line of input to encode the +newline sequences. There is no limit on the length of subject lines; the input +buffer is automatically extended if it is too small. There are replication +features that makes it possible to generate long repetitive pattern or subject +lines without having to supply them explicitly. +

+

+An empty line or the end of the file signals the end of the subject lines for a +test, at which point a new pattern or command line is expected if there is +still input to be read. +

+
COMMAND LINES
+

+In between sets of test data, a line that begins with # is interpreted as a +command line. If the first character is followed by white space or an +exclamation mark, the line is treated as a comment, and ignored. Otherwise, the +following commands are recognized: +

+  #forbid_utf
+
+Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP +options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and +the use of (*UTF) and (*UCP) at the start of patterns. This command also forces +an error if a subsequent pattern contains any occurrences of \P, \p, or \X, +which are still supported when PCRE2_UTF is not set, but which require Unicode +property support to be included in the library. +

+

+This is a trigger guard that is used in test files to ensure that UTF or +Unicode property tests are not accidentally added to files that are used when +Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and +PCRE2_NEVER_UCP as a default can also be obtained by the use of #pattern; +the difference is that #forbid_utf cannot be unset, and the automatic +options are not displayed in pattern information, to avoid cluttering up test +output. +

+  #load <filename>
+
+This command is used to load a set of precompiled patterns from a file, as +described in the section entitled "Saving and restoring compiled patterns" +below. +
+  #loadtables <filename>
+
+This command is used to load a set of binary character tables that can be +accessed by the tables=3 qualifier. Such tables can be created by the +pcre2_dftables program with the -b option. +
+  #newline_default [<newline-list>]
+
+When PCRE2 is built, a default newline convention can be specified. This +determines which characters and/or character pairs are recognized as indicating +a newline in a pattern or subject string. The default can be overridden when a +pattern is compiled. The standard test files contain tests of various newline +conventions, but the majority of the tests expect a single linefeed to be +recognized as a newline by default. Without special action the tests would fail +when PCRE2 is compiled with either CR or CRLF as the default newline. +

+

+The #newline_default command specifies a list of newline types that are +acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, +ANY, or NUL (in upper or lower case), for example: +

+  #newline_default LF Any anyCRLF
+
+If the default newline is in the list, this command has no effect. Otherwise, +except when testing the POSIX API, a newline modifier that specifies the +first newline convention in the list (LF in the above example) is added to any +pattern that does not already have a newline modifier. If the newline +list is empty, the feature is turned off. This command is present in a number +of the standard test input files. +

+

+When the POSIX API is being tested there is no way to override the default +newline convention, though it is possible to set the newline convention from +within the pattern. A warning is given if the posix or posix_nosub +modifier is used when #newline_default would set a default for the +non-POSIX API. +

+  #pattern <modifier-list>
+
+This command sets a default modifier list that applies to all subsequent +patterns. Modifiers on a pattern can change these settings. +
+  #perltest
+
+This line is used in test files that can also be processed by perltest.sh +to confirm that Perl gives the same results as PCRE2. Subsequent tests are +checked for the use of pcre2test features that are incompatible with the +perltest.sh script. +

+

+Patterns must use '/' as their delimiter, and only certain modifiers are +supported. Comment lines, #pattern commands, and #subject commands that set or +unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and +#newline_default commands, which are needed in the relevant pcre2test files, +are silently ignored. All other command lines are ignored, but give a warning +message. The #perltest command helps detect tests that are accidentally +put in the wrong file or use the wrong delimiter. For more details of the +perltest.sh script see the comments it contains. +

+  #pop [<modifiers>]
+  #popcopy [<modifiers>]
+
+These commands are used to manipulate the stack of compiled patterns, as +described in the section entitled "Saving and restoring compiled patterns" +below. +
+  #save <filename>
+
+This command is used to save a set of compiled patterns to a file, as described +in the section entitled "Saving and restoring compiled patterns" +below. +
+  #subject <modifier-list>
+
+This command sets a default modifier list that applies to all subsequent +subject lines. Modifiers on a subject line can change these settings. +

+
MODIFIER SYNTAX
+

+Modifier lists are used with both pattern and subject lines. Items in a list +are separated by commas followed by optional white space. Trailing whitespace +in a modifier list is ignored. Some modifiers may be given for both patterns +and subject lines, whereas others are valid only for one or the other. Each +modifier has a long name, for example "anchored", and some of them must be +followed by an equals sign and a value, for example, "offset=12". Values cannot +contain comma characters, but may contain spaces. Modifiers that do not take +values may be preceded by a minus sign to turn off a previous setting. +

+

+A few of the more common modifiers can also be specified as single letters, for +example "i" for "caseless". In documentation, following the Perl convention, +these are written with a slash ("the /i modifier") for clarity. Abbreviated +modifiers must all be concatenated in the first item of a modifier list. If the +first item is not recognized as a long modifier name, it is interpreted as a +sequence of these abbreviations. For example: +

+  /abc/ig,newline=cr,jit=3
+
+This is a pattern line whose modifier list starts with two one-letter modifiers +(/i and /g). The lower-case abbreviated modifiers are the same as used in Perl. +

+
PATTERN SYNTAX
+

+A pattern line must start with one of the following characters (common symbols, +excluding pattern meta-characters): +

+  / ! " ' ` - = _ : ; , % & @ ~
+
+This is interpreted as the pattern's delimiter. A regular expression may be +continued over several input lines, in which case the newline characters are +included within it. It is possible to include the delimiter as a literal within +the pattern by escaping it with a backslash, for example +
+  /abc\/def/
+
+If you do this, the escape and the delimiter form part of the pattern, but +since the delimiters are all non-alphanumeric, the inclusion of the backslash +does not affect the pattern's interpretation. Note, however, that this trick +does not work within \Q...\E literal bracketing because the backslash will +itself be interpreted as a literal. If the terminating delimiter is immediately +followed by a backslash, for example, +
+  /abc/\
+
+a backslash is added to the end of the pattern. This is done to provide a way +of testing the error condition that arises if a pattern finishes with a +backslash, because +
+  /abc\/
+
+is interpreted as the first line of a pattern that starts with "abc/", causing +pcre2test to read the next line as a continuation of the regular expression. +

+

+A pattern can be followed by a modifier list (details below). +

+
SUBJECT LINE SYNTAX
+

+Before each subject line is passed to pcre2_match(), +pcre2_dfa_match(), or pcre2_jit_match(), leading and trailing white +space is removed, and the line is scanned for backslash escapes, unless the +subject_literal modifier was set for the pattern. The following provide a +means of encoding non-printing characters in a visible way: +

+  \a          alarm (BEL, \x07)
+  \b          backspace (\x08)
+  \e          escape (\x27)
+  \f          form feed (\x0c)
+  \n          newline (\x0a)
+  \N{U+hh...} unicode character (any number of hex digits)
+  \r          carriage return (\x0d)
+  \t          tab (\x09)
+  \v          vertical tab (\x0b)
+  \ddd        octal number (up to 3 octal digits); represent a single
+                code point unless larger than 255 with the 8-bit library
+  \o{dd...}   octal number (any number of octal digits} representing a
+                character in UTF mode or a code point
+  \xhh        hexadecimal byte (up to 2 hex digits)
+  \x{hh...}   hexadecimal number (up to 8 hex digits) representing a
+                character in UTF mode or a code point
+
+Invoking \N{U+hh...} or \x{hh...} doesn't require the use of the utf +modifier on the pattern. It is always recognized. There may be any number of +hexadecimal digits inside the braces; invalid values provoke error messages +but when using \N{U+hh...} with some invalid unicode characters they will +be accepted with a warning instead. +

+

+Note that even in UTF-8 mode, \xhh (and depending of how large, \ddd) +describe one byte rather than one character; this makes it possible to +construct invalid UTF-8 sequences for testing purposes. On the other hand, +\x{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only generating +more than one byte if the value is greater than 127. To avoid the ambiguity +it is preferred to use \N{U+hh...} when describing characters. When testing +the 8-bit library not in UTF-8 mode, \x{hh} generates one byte for values +that could fit on it, and causes an error for greater values. +

+

+When testing the 16-bit library, not in UTF-16 mode, all 4-digit \x{hhhh} +values are accepted. This makes it possible to construct invalid UTF-16 +sequences for testing purposes. +

+

+When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit \x{...} +values are accepted. This makes it possible to construct invalid UTF-32 +sequences for testing purposes. +

+

+There is a special backslash sequence that specifies replication of one or more +characters: +

+  \[<characters>]{<count>}
+
+This makes it possible to test long strings without having to provide them as +part of the file. For example: +
+  \[abc]{4}
+
+is converted to "abcabcabcabc". This feature does not support nesting. To +include a closing square bracket in the characters, code it as \x5D. +

+

+A backslash followed by an equals sign marks the end of the subject string and +the start of a modifier list. For example: +

+  abc\=notbol,notempty
+
+If the subject string is empty and \= is followed by whitespace, the line is +treated as a comment line, and is not used for matching. For example: +
+  \= This is a comment.
+  abc\= This is an invalid modifier list.
+
+A backslash followed by any other non-alphanumeric character just escapes that +character. A backslash followed by anything else causes an error. However, if +the very last character in the line is a backslash (and there is no modifier +list), it is ignored. This gives a way of passing an empty line as data, since +a real empty line terminates the data input. +

+

+If the subject_literal modifier is set for a pattern, all subject lines +that follow are treated as literals, with no special treatment of backslashes. +No replication is possible, and any subject modifiers must be set as defaults +by a #subject command. +

+
PATTERN MODIFIERS
+

+There are several types of modifier that can appear in pattern lines. Except +where noted below, they may also be used in #pattern commands. A +pattern's modifier list can add to or override default modifiers that were set +by a previous #pattern command. +

+
+Setting compilation options +
+

+The following modifiers set options for pcre2_compile(). Most of them set +bits in the options argument of that function, but those whose names start with +PCRE2_EXTRA are additional options that are set in the compile context. +Some of these options have single-letter abbreviations. There is special +handling for /x: if a second x is present, PCRE2_EXTENDED is converted into +PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well, +though this makes no difference to the way pcre2_compile() behaves. See +pcre2api +for a description of the effects of these options. +

+      allow_empty_class         set PCRE2_ALLOW_EMPTY_CLASS
+      allow_lookaround_bsk      set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
+      allow_surrogate_escapes   set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+      alt_bsux                  set PCRE2_ALT_BSUX
+      alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
+      alt_extended_class        set PCRE2_ALT_EXTENDED_CLASS
+      alt_verbnames             set PCRE2_ALT_VERBNAMES
+      anchored                  set PCRE2_ANCHORED
+  /a  ascii_all                 set all ASCII options
+      ascii_bsd                 set PCRE2_EXTRA_ASCII_BSD
+      ascii_bss                 set PCRE2_EXTRA_ASCII_BSS
+      ascii_bsw                 set PCRE2_EXTRA_ASCII_BSW
+      ascii_digit               set PCRE2_EXTRA_ASCII_DIGIT
+      ascii_posix               set PCRE2_EXTRA_ASCII_POSIX
+      auto_callout              set PCRE2_AUTO_CALLOUT
+      bad_escape_is_literal     set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
+  /i  caseless                  set PCRE2_CASELESS
+  /r  caseless_restrict         set PCRE2_EXTRA_CASELESS_RESTRICT
+      dollar_endonly            set PCRE2_DOLLAR_ENDONLY
+  /s  dotall                    set PCRE2_DOTALL
+      dupnames                  set PCRE2_DUPNAMES
+      endanchored               set PCRE2_ENDANCHORED
+      escaped_cr_is_lf          set PCRE2_EXTRA_ESCAPED_CR_IS_LF
+  /x  extended                  set PCRE2_EXTENDED
+  /xx extended_more             set PCRE2_EXTENDED_MORE
+      extra_alt_bsux            set PCRE2_EXTRA_ALT_BSUX
+      firstline                 set PCRE2_FIRSTLINE
+      literal                   set PCRE2_LITERAL
+      match_line                set PCRE2_EXTRA_MATCH_LINE
+      match_invalid_utf         set PCRE2_MATCH_INVALID_UTF
+      match_unset_backref       set PCRE2_MATCH_UNSET_BACKREF
+      match_word                set PCRE2_EXTRA_MATCH_WORD
+  /m  multiline                 set PCRE2_MULTILINE
+      never_backslash_c         set PCRE2_NEVER_BACKSLASH_C
+      never_callout             set PCRE2_EXTRA_NEVER_CALLOUT
+      never_ucp                 set PCRE2_NEVER_UCP
+      never_utf                 set PCRE2_NEVER_UTF
+  /n  no_auto_capture           set PCRE2_NO_AUTO_CAPTURE
+      no_auto_possess           set PCRE2_NO_AUTO_POSSESS
+      no_bs0                    set PCRE2_EXTRA_NO_BS0
+      no_dotstar_anchor         set PCRE2_NO_DOTSTAR_ANCHOR
+      no_start_optimize         set PCRE2_NO_START_OPTIMIZE
+      no_utf_check              set PCRE2_NO_UTF_CHECK
+      python_octal              set PCRE2_EXTRA_PYTHON_OCTAL
+      turkish_casing            set PCRE2_EXTRA_TURKISH_CASING
+      ucp                       set PCRE2_UCP
+      ungreedy                  set PCRE2_UNGREEDY
+      use_offset_limit          set PCRE2_USE_OFFSET_LIMIT
+      utf                       set PCRE2_UTF
+
+As well as turning on the PCRE2_UTF option, the utf modifier causes all +non-printing characters in output strings to be printed using the \x{hh...} +notation. Otherwise, those less than 0x100 are output in hex without the curly +brackets. Setting utf in 16-bit or 32-bit mode also causes pattern and +subject strings to be translated to UTF-16 or UTF-32, respectively, before +being passed to library functions. +
+
+The following modifiers enable or disable performance optimizations by +calling pcre2_set_optimize() before invoking the regex compiler. +
+      optimization_full      enable all optional optimizations
+      optimization_none      disable all optional optimizations
+      auto_possess           auto-possessify variable quantifiers
+      auto_possess_off       don't auto-possessify variable quantifiers
+      dotstar_anchor         anchor patterns starting with .*
+      dotstar_anchor_off     don't anchor patterns starting with .*
+      start_optimize         enable pre-scan of subject string
+      start_optimize_off     disable pre-scan of subject string
+
+See the +pcre2_set_optimize +documentation for details on these optimizations. +

+
+Setting compilation controls +
+

+The following modifiers affect the compilation process or request information +about the pattern. There are single-letter abbreviations for some that are +heavily used in the test files. +

+  /B  bincode                   show binary code without lengths
+      bsr=[anycrlf|unicode]     specify \R handling
+      callout_info              show callout information
+      convert=<options>         request foreign pattern conversion
+      convert_glob_escape=c     set glob escape character
+      convert_glob_separator=c  set glob separator character
+      convert_length            set convert buffer length
+      debug                     same as info,fullbincode
+      expand                    expand repetition syntax in pattern
+      framesize                 show matching frame size
+      fullbincode               show binary code with lengths
+  /I  info                      show info about compiled pattern
+      hex                       unquoted characters are hexadecimal
+      jit[=<number>]            use JIT
+      jitfast                   use JIT fast path
+      jitverify                 verify JIT use
+      locale=<name>             use this locale
+      max_pattern_compiled      ) set maximum compiled pattern
+                 _length=<n>    )   length (bytes)
+      max_pattern_length=<n>    set maximum pattern length (code units)
+      max_varlookbehind=<n>     set maximum variable lookbehind length
+      memory                    show memory used
+      newline=<type>            set newline type
+      null_context              compile with a NULL context
+      null_pattern              pass pattern as NULL
+      parens_nest_limit=<n>     set maximum parentheses depth
+      posix                     use the POSIX API
+      posix_nosub               use the POSIX API with REG_NOSUB
+      push                      push compiled pattern onto the stack
+      pushcopy                  push a copy onto the stack
+      pushtablescopy            push a copy with tables onto the stack
+      stackguard=<number>       test the stackguard feature
+      subject_literal           treat all subject lines as literal
+      tables=[0|1|2|3]          select internal tables
+      use_length                do not zero-terminate the pattern
+      utf8_input                treat input as UTF-8
+
+The effects of these modifiers are described in the following sections. +

+
+Newline and \R handling +
+

+The bsr modifier specifies what \R in a pattern should match. If it is +set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to "unicode", +\R matches any Unicode newline sequence. The default can be specified when +PCRE2 is built; if it is not, the default is set to Unicode. +

+

+The newline modifier specifies which characters are to be interpreted as +newlines, both in the pattern and in subject lines. The type must be one of CR, +LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). +

+
+Information about a pattern +
+

+The debug modifier is a shorthand for info,fullbincode, requesting +all available information. +

+

+The bincode modifier causes a representation of the compiled code to be +output after compilation. This information does not contain length and offset +values, which ensures that the same output is generated for different internal +link sizes and different code unit widths. By using bincode, the same +regression tests can be used in different environments. +

+

+The fullbincode modifier, by contrast, does include length and +offset values. This is used in a few special tests that run only for specific +code unit widths and link sizes, and is also useful for one-off tests. +

+

+The info modifier requests information about the compiled pattern +(whether it is anchored, has a fixed first character, and so on). The +information is obtained from the pcre2_pattern_info() function. Here are +some typical examples: +

+    re> /(?i)(^a|^b)/m,info
+  Capture group count = 1
+  Compile options: multiline
+  Overall options: caseless multiline
+  First code unit at start or follows newline
+  Subject length lower bound = 1
+
+    re> /(?i)abc/info
+  Capture group count = 0
+  Compile options: <none>
+  Overall options: caseless
+  First code unit = 'a' (caseless)
+  Last code unit = 'c' (caseless)
+  Subject length lower bound = 3
+
+"Compile options" are those specified by modifiers; "overall options" have +added options that are taken or deduced from the pattern. If both sets of +options are the same, just a single "options" line is output; if there are no +options, the line is omitted. "First code unit" is where any match must start; +if there is more than one they are listed as "starting code units". "Last code +unit" is the last literal code unit that must be present in any match. This is +not necessarily the last character. These lines are omitted if no starting or +ending code units are recorded. The subject length line is omitted when +no_start_optimize is set because the minimum length is not calculated +when it can never be used. +

+

+The framesize modifier shows the size, in bytes, of each storage frame +used by pcre2_match() for handling backtracking. The size depends on the +number of capturing parentheses in the pattern. A vector of these frames is +used at matching time; its overall size is shown when the heaframes_size +subject modifier is set. +

+

+The callout_info modifier requests information about all the callouts in +the pattern. A list of them is output at the end of any other information that +is requested. For each callout, either its number or string is given, followed +by the item that follows it in the pattern. +

+
+Passing a NULL context +
+

+Normally, pcre2test passes a context block to pcre2_compile(). If +the null_context modifier is set, however, NULL is passed. This is for +testing that pcre2_compile() behaves correctly in this case (it uses +default values). +

+
+Passing a NULL pattern +
+

+The null_pattern modifier is for testing the behaviour of +pcre2_compile() when the pattern argument is NULL. The length value +passed is the default PCRE2_ZERO_TERMINATED unless use_length is set. +Any length other than zero causes an error. +

+
+Specifying pattern characters in hexadecimal +
+

+The hex modifier specifies that the characters of the pattern, except for +substrings enclosed in single or double quotes, are to be interpreted as pairs +of hexadecimal digits. This feature is provided as a way of creating patterns +that contain binary zeros and other non-printing characters. White space is +permitted between pairs of digits. For example, this pattern contains three +characters: +

+  /ab 32 59/hex
+
+Parts of such a pattern are taken literally if quoted. This pattern contains +nine characters, only two of which are specified in hexadecimal: +
+  /ab "literal" 32/hex
+
+Either single or double quotes may be used. There is no way of including +the delimiter within a substring. The hex and expand modifiers are +mutually exclusive. +

+
+Specifying the pattern's length +
+

+By default, patterns are passed to the compiling functions as zero-terminated +strings but can be passed by length instead of being zero-terminated. The +use_length modifier causes this to happen. Using a length happens +automatically (whether or not use_length is set) when hex is set, +because patterns specified in hexadecimal may contain binary zeros. +

+

+If hex or use_length is used with the POSIX wrapper API (see +"Using the POSIX wrapper API" +below), the REG_PEND extension is used to pass the pattern's length. +

+
+Specifying a maximum for variable lookbehinds +
+

+Variable lookbehind assertions are supported only if, for each one, there is a +maximum length (in characters) that it can match. There is a limit on this, +whose default can be set at build time, with an ultimate default of 255. The +max_varlookbehind modifier uses the pcre2_set_max_varlookbehind() +function to change the limit. Lookbehinds whose branches each match a fixed +length are limited to 65535 characters per branch. +

+
+Specifying wide characters in 16-bit and 32-bit modes +
+

+In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and +translated to UTF-16 or UTF-32 when the utf modifier is set. For testing +the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input modifier +can be used. It is mutually exclusive with utf. Input lines are +interpreted as UTF-8 as a means of specifying wide characters. More details are +given in +"Input encoding" +above. +

+
+Generating long repetitive patterns +
+

+Some tests use long patterns that are very repetitive. Instead of creating a +very long input line for such a pattern, you can use a special repetition +feature, similar to the one described for subject lines above. If the +expand modifier is present on a pattern, parts of the pattern that have +the form +

+  \[<characters>]{<count>}
+
+are expanded before the pattern is passed to pcre2_compile(). For +example, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction +cannot be nested. An initial "\[" sequence is recognized only if "]{" followed +by decimal digits and "}" is found later in the pattern. If not, the characters +remain in the pattern unaltered. The expand and hex modifiers are +mutually exclusive. +

+

+If part of an expanded pattern looks like an expansion, but is really part of +the actual pattern, unwanted expansion can be avoided by giving two values in +the quantifier. For example, \[AB]{6000,6000} is not recognized as an +expansion item. +

+

+If the info modifier is set on an expanded pattern, the result of the +expansion is included in the information that is output. +

+
+JIT compilation +
+

+Just-in-time (JIT) compiling is a heavyweight optimization that can greatly +speed up pattern matching. See the +pcre2jit +documentation for details. JIT compiling happens, optionally, after a pattern +has been successfully compiled into an internal form. The JIT compiler converts +this to optimized machine code. It needs to know whether the match-time options +PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, because +different code is generated for the different cases. See the partial +modifier in "Subject Modifiers" +below +for details of how these options are specified for each match attempt. +

+

+JIT compilation is requested by the jit pattern modifier, which may +optionally be followed by an equals sign and a number in the range 0 to 7. +The three bits that make up the number specify which of the three JIT operating +modes are to be compiled: +

+  1  compile JIT code for non-partial matching
+  2  compile JIT code for soft partial matching
+  4  compile JIT code for hard partial matching
+
+The possible values for the jit modifier are therefore: +
+  0  disable JIT
+  1  normal matching only
+  2  soft partial matching only
+  3  normal and soft partial matching
+  4  hard partial matching only
+  6  soft and hard partial matching only
+  7  all three modes
+
+If no number is given, 7 is assumed. The phrase "partial matching" means a call +to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the +PCRE2_PARTIAL_HARD option set. Note that such a call may return a complete +match; the options enable the possibility of a partial match, but do not +require it. Note also that if you request JIT compilation only for partial +matching (for example, jit=2) but do not set the partial modifier on a +subject line, that match will not use JIT code because none was compiled for +non-partial matching. +

+

+If JIT compilation is successful, the compiled JIT code will automatically be +used when an appropriate type of match is run, except when incompatible +run-time options are specified. For more details, see the +pcre2jit +documentation. See also the jitstack modifier below for a way of +setting the size of the JIT stack. +

+

+If the jitfast modifier is specified, matching is done using the JIT +"fast path" interface, pcre2_jit_match(), which skips some of the sanity +checks that are done by pcre2_match(), and of course does not work when +JIT is not supported. If jitfast is specified without jit, jit=7 is +assumed. +

+

+If the jitverify modifier is specified, information about the compiled +pattern shows whether JIT compilation was or was not successful. If +jitverify is specified without jit, jit=7 is assumed. If JIT +compilation is successful when jitverify is set, the text "(JIT)" is +added to the first output line after a match or non match when JIT-compiled +code was actually used in the match. +

+
+Setting a locale +
+

+The locale modifier must specify the name of a locale, for example: +

+  /pattern/locale=fr_FR
+
+The given locale is set, pcre2_maketables() is called to build a set of +character tables for the locale, and this is then passed to +pcre2_compile() when compiling the regular expression. The same tables +are used when matching the following subject lines. The locale modifier +applies only to the pattern on which it appears, but can be given in a +#pattern command if a default is needed. Setting a locale and alternate +character tables are mutually exclusive. +

+
+Showing pattern memory +
+

+The memory modifier causes the size in bytes of the memory used to hold +the compiled pattern to be output. This does not include the size of the +pcre2_code block; it is just the actual compiled data. If the pattern is +subsequently passed to the JIT compiler, the size of the JIT compiled code is +also output. Here is an example: +

+    re> /a(b)c/jit,memory
+  Memory allocation (code space): 21
+  Memory allocation (JIT code): 1910
+
+
+

+
+Limiting nested parentheses +
+

+The parens_nest_limit modifier sets a limit on the depth of nested +parentheses in a pattern. Breaching the limit causes a compilation error. +The default for the library is set when PCRE2 is built, but pcre2test +sets its own default of 220, which is required for running the standard test +suite. +

+
+Limiting the pattern length +
+

+The max_pattern_length modifier sets a limit, in code units, to the +length of pattern that pcre2_compile() will accept. Breaching the limit +causes a compilation error. The default is the largest number a PCRE2_SIZE +variable can hold (essentially unlimited). +

+
+Limiting the size of a compiled pattern +
+

+The max_pattern_compiled_length modifier sets a limit, in bytes, to the +amount of memory used by a compiled pattern. Breaching the limit causes a +compilation error. The default is the largest number a PCRE2_SIZE variable can +hold (essentially unlimited). +

+
+Using the POSIX wrapper API +
+

+The posix and posix_nosub modifiers cause pcre2test to call +PCRE2 via the POSIX wrapper API rather than its native API. When +posix_nosub is used, the POSIX option REG_NOSUB is passed to +regcomp(). The POSIX wrapper supports only the 8-bit library. Note that +it does not imply POSIX matching semantics; for more detail see the +pcre2posix +documentation. The following pattern modifiers set options for the +regcomp() function: +

+  caseless           REG_ICASE
+  multiline          REG_NEWLINE
+  dotall             REG_DOTALL     )
+  ungreedy           REG_UNGREEDY   ) These options are not part of
+  ucp                REG_UCP        )   the POSIX standard
+  utf                REG_UTF8       )
+
+The regerror_buffsize modifier specifies a size for the error buffer that +is passed to regerror() in the event of a compilation error. For example: +
+  /abc/posix,regerror_buffsize=20
+
+This provides a means of testing the behaviour of regerror() when the +buffer is too small for the error message. If this modifier has not been set, a +large buffer is used. +

+

+The aftertext and allaftertext subject modifiers work as described +below. All other modifiers are either ignored, with a warning message, or cause +an error. +

+

+The pattern is passed to regcomp() as a zero-terminated string by +default, but if the use_length or hex modifiers are set, the +REG_PEND extension is used to pass it by length. +

+
+Testing the stack guard feature +
+

+The stackguard modifier is used to test the use of +pcre2_set_compile_recursion_guard(), a function that is provided to +enable stack availability to be checked during compilation (see the +pcre2api +documentation for details). If the number specified by the modifier is greater +than zero, pcre2_set_compile_recursion_guard() is called to set up +callback from pcre2_compile() to a local function. The argument it +receives is the current nesting parenthesis depth; if this is greater than the +value given by the modifier, non-zero is returned, causing the compilation to +be aborted. +

+
+Using alternative character tables +
+

+The value specified for the tables modifier must be one of the digits 0, +1, 2, or 3. It causes a specific set of built-in character tables to be passed +to pcre2_compile(). This is used in the PCRE2 tests to check behaviour +with different character tables. The digit specifies the tables as follows: +

+  0   do not pass any special character tables
+  1   the default ASCII tables, as distributed in
+        pcre2_chartables.c.dist
+  2   a set of tables defining ISO 8859 characters
+  3   a set of tables loaded by the #loadtables command
+
+In tables 2, some characters whose codes are greater than 128 are identified as +letters, digits, spaces, etc. Tables 3 can be used only after a +#loadtables command has loaded them from a binary file. Setting alternate +character tables and a locale are mutually exclusive. +

+
+Setting certain match controls +
+

+The following modifiers are really subject modifiers, and are described under +"Subject Modifiers" below. However, they may be included in a pattern's +modifier list, in which case they are applied to every subject line that is +processed with that pattern. These modifiers do not affect the compilation +process. +

+      aftertext                   show text after match
+      allaftertext                show text after captures
+      allcaptures                 show all captures
+      allvector                   show the entire ovector
+      allusedtext                 show all consulted text
+      altglobal                   alternative global matching
+  /g  global                      global matching
+      heapframes_size             show match data heapframes size
+      jitstack=<n>                set size of JIT stack
+      mark                        show mark values
+      replace=<string>            specify a replacement string
+      startchar                   show starting character when relevant
+      substitute_callout          use substitution callouts
+      substitute_case_callout     use substitution case callouts
+      substitute_extended         use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal          use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched          use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length  use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=<n>         skip substitution <n>
+      substitute_stop=<n>         skip substitution <n> and following
+      substitute_unknown_unset    use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty      use PCRE2_SUBSTITUTE_UNSET_EMPTY
+
+These modifiers may not appear in a #pattern command. If you want them as +defaults, set them in a #subject command. +

+
+Specifying literal subject lines +
+

+If the subject_literal modifier is present on a pattern, all the subject +lines that it matches are taken as literal strings, with no interpretation of +backslashes. It is not possible to set subject modifiers on such lines, but any +that are set as defaults by a #subject command are recognized. +

+
+Saving a compiled pattern +
+

+When a pattern with the push modifier is successfully compiled, it is +pushed onto a stack of compiled patterns, and pcre2test expects the next +line to contain a new pattern (or a command) instead of a subject line. This +facility is used when saving compiled patterns to a file, as described in the +section entitled "Saving and restoring compiled patterns" +below. +If pushcopy is used instead of push, a copy of the compiled +pattern is stacked, leaving the original as current, ready to match the +following input lines. This provides a way of testing the +pcre2_code_copy() function. +The push and pushcopy modifiers are incompatible with compilation +modifiers such as global that act at match time. Any that are specified +are ignored (for the stacked copy), with a warning message, except for +replace, which causes an error. Note that jitverify, which is +allowed, does not carry through to any subsequent matching that uses a stacked +pattern. +

+
+Testing foreign pattern conversion +
+

+The experimental foreign pattern conversion functions in PCRE2 can be tested by +setting the convert modifier. Its argument is a colon-separated list of +options, which set the equivalent option for the pcre2_pattern_convert() +function: +

+  glob                    PCRE2_CONVERT_GLOB
+  glob_no_starstar        PCRE2_CONVERT_GLOB_NO_STARSTAR
+  glob_no_wild_separator  PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR
+  posix_basic             PCRE2_CONVERT_POSIX_BASIC
+  posix_extended          PCRE2_CONVERT_POSIX_EXTENDED
+  unset                   Unset all options
+
+The "unset" value is useful for turning off a default that has been set by a +#pattern command. When one of these options is set, the input pattern is +passed to pcre2_pattern_convert(). If the conversion is successful, the +result is reflected in the output and then passed to pcre2_compile(). The +normal utf and no_utf_check options, if set, cause the +PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to +pcre2_pattern_convert(). +

+

+By default, the conversion function is allowed to allocate a buffer for its +output. However, if the convert_length modifier is set to a value greater +than zero, pcre2test passes a buffer of the given length. This makes it +possible to test the length check. +

+

+The convert_glob_escape and convert_glob_separator modifiers can be +used to specify the escape and separator characters for glob processing, +overriding the defaults, which are operating-system dependent. +

+
SUBJECT MODIFIERS
+

+The modifiers that can appear in subject lines and the #subject +command are of two types. +

+
+Setting match options +
+

+The following modifiers set options for pcre2_match() or +pcre2_dfa_match(). See +pcre2api +for a description of their effects. +

+      anchored                   set PCRE2_ANCHORED
+      copy_matched_subject       set PCRE2_COPY_MATCHED_SUBJECT
+      endanchored                set PCRE2_ENDANCHORED
+      dfa_restart                set PCRE2_DFA_RESTART
+      dfa_shortest               set PCRE2_DFA_SHORTEST
+      disable_recurseloop_check  set PCRE2_DISABLE_RECURSELOOP_CHECK
+      no_jit                     set PCRE2_NO_JIT
+      no_utf_check               set PCRE2_NO_UTF_CHECK
+      notbol                     set PCRE2_NOTBOL
+      notempty                   set PCRE2_NOTEMPTY
+      notempty_atstart           set PCRE2_NOTEMPTY_ATSTART
+      noteol                     set PCRE2_NOTEOL
+      partial_hard (or ph)       set PCRE2_PARTIAL_HARD
+      partial_soft (or ps)       set PCRE2_PARTIAL_SOFT
+
+The partial matching modifiers are provided with abbreviations because they +appear frequently in tests. +

+

+If the posix or posix_nosub modifier was present on the pattern, +causing the POSIX wrapper API to be used, the only option-setting modifiers +that have any effect are notbol, notempty, and noteol, +causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to +regexec(). The other modifiers are ignored, with a warning message. +

+

+There is one additional modifier that can be used with the POSIX wrapper. It is +ignored (with a warning) if used for non-POSIX matching. +

+      posix_startend=<n>[:<m>]
+
+This causes the subject string to be passed to regexec() using the +REG_STARTEND option, which uses offsets to specify which part of the string is +searched. If only one number is given, the end offset is passed as the end of +the subject string. For more detail of REG_STARTEND, see the +pcre2posix +documentation. If the subject string contains binary zeros (coded as escapes +such as \x{00} because pcre2test does not support actual binary zeros in +its input), you must use posix_startend to specify its length. +

+
+Setting match controls +
+

+The following modifiers affect the matching process or request additional +information. Some of them may also be specified on a pattern line (see above), +in which case they apply to every subject line that is matched against that +pattern, but can be overridden by modifiers on the subject. +

+      aftertext                  show text after match
+      allaftertext               show text after captures
+      allcaptures                show all captures
+      allusedtext                show all consulted text (non-JIT only)
+      allvector                  show the entire ovector
+      altglobal                  alternative global matching
+      callout_capture            show captures at callout time
+      callout_data=<n>           set a value to pass via callouts
+      callout_error=<n>[:<m>]    control callout error
+      callout_extra              show extra callout information
+      callout_fail=<n>[:<m>]     control callout failure
+      callout_no_where           do not show position of a callout
+      callout_none               do not supply a callout function
+      copy=<number or name>      copy captured substring
+      depth_limit=<n>            set a depth limit
+      dfa                        use pcre2_dfa_match()
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
+      get=<number or name>       extract captured substring
+      getall                     extract all captured substrings
+  /g  global                     global matching
+      heapframes_size            show match data heapframes size
+      heap_limit=<n>             set a limit on heap memory (Kbytes)
+      jitstack=<n>               set size of JIT stack
+      mark                       show mark values
+      match_limit=<n>            set a match limit
+      memory                     show heap memory usage
+      null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
+      offset=<n>                 set starting offset
+      offset_limit=<n>           set offset limit
+      ovector=<n>                set size of output vector
+      recursion_limit=<n>        obsolete synonym for depth_limit
+      replace=<string>           specify a replacement string
+      startchar                  show startchar when relevant
+      startoffset=<n>            same as offset=<n>
+      substitute_callout         use substitution callouts
+      substitute_case_callout    use substitution case callouts
+      substitute_extended        use PCRE2_SUBSTITUTE_EXTENDED
+      substitute_literal         use PCRE2_SUBSTITUTE_LITERAL
+      substitute_matched         use PCRE2_SUBSTITUTE_MATCHED
+      substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+      substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+      substitute_skip=<n>        skip substitution number n
+      substitute_stop=<n>        skip substitution number n and greater
+      substitute_unknown_unset   use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+      substitute_unset_empty     use PCRE2_SUBSTITUTE_UNSET_EMPTY
+      zero_terminate             pass the subject as zero-terminated
+
+The effects of these modifiers are described in the following sections. When +matching via the POSIX wrapper API, the aftertext, allaftertext, +and ovector subject modifiers work as described below. All other +modifiers are either ignored, with a warning message, or cause an error. +

+
+Showing more text +
+

+The aftertext modifier requests that as well as outputting the part of +the subject string that matched the entire pattern, pcre2test should in +addition output the remainder of the subject string. This is useful for tests +where the subject contains multiple copies of the same substring. The +allaftertext modifier requests the same action for captured substrings as +well as the main matched substring. In each case the remainder is output on the +following line with a plus character following the capture number. +

+

+The allusedtext modifier requests that all the text that was consulted +during a successful pattern match by the interpreter should be shown, for both +full and partial matches. This feature is not supported for JIT matching, and +if requested with JIT it is ignored (with a warning message). Setting this +modifier affects the output if there is a lookbehind at the start of a match, +or, for a complete match, a lookahead at the end, or if \K is used in the +pattern. Characters that precede or follow the start and end of the actual +match are indicated in the output by '<' or '>' characters underneath them. +Here is an example: +

+    re> /(?<=pqr)abc(?=xyz)/
+  data> 123pqrabcxyz456\=allusedtext
+   0: pqrabcxyz
+      <<<   >>>
+  data> 123pqrabcxy\=ph,allusedtext
+  Partial match: pqrabcxy
+                 <<<
+
+The first, complete match shows that the matched string is "abc", with the +preceding and following strings "pqr" and "xyz" having been consulted during +the match (when processing the assertions). The partial match can indicate only +the preceding string. +

+

+The startchar modifier requests that the starting character for the match +be indicated, if it is different to the start of the matched string. The only +time when this occurs is when \K has been processed as part of the match. In +this situation, the output for the matched string is displayed from the +starting character instead of from the match point, with circumflex characters +under the earlier characters. For example: +

+    re> /abc\Kxyz/
+  data> abcxyz\=startchar
+   0: abcxyz
+      ^^^
+
+Unlike allusedtext, the startchar modifier can be used with JIT. +However, these two modifiers are mutually exclusive. +

+
+Showing the value of all capture groups +
+

+The allcaptures modifier requests that the values of all potential +captured parentheses be output after a match. By default, only those up to the +highest one actually used in the match are output (corresponding to the return +code from pcre2_match()). Groups that did not take part in the match +are output as "<unset>". This modifier is not relevant for DFA matching (which +does no capturing) and does not apply when replace is specified; it is +ignored, with a warning message, if present. +

+
+Showing the entire ovector, for all outcomes +
+

+The allvector modifier requests that the entire ovector be shown, +whatever the outcome of the match. Compare allcaptures, which shows only +up to the maximum number of capture groups for the pattern, and then only for a +successful complete non-DFA match. This modifier, which acts after any match +result, and also for DFA matching, provides a means of checking that there are +no unexpected modifications to ovector fields. Before each match attempt, the +ovector is filled with a special value, and if this is found in both elements +of a capturing pair, "<unchanged>" is output. After a successful match, this +applies to all groups after the maximum capture group for the pattern. In other +cases it applies to the entire ovector. After a partial match, the first two +elements are the only ones that should be set. After a DFA match, the amount of +ovector that is used depends on the number of matches that were found. +

+
+Testing pattern callouts +
+

+A callout function is supplied when pcre2test calls the library matching +functions, unless callout_none is specified. Its behaviour can be +controlled by various modifiers listed above whose names begin with +callout_. Details are given in the section entitled "Callouts" +below. +Testing callouts from pcre2_substitute() is described separately in +"Testing the substitution function" +below. +

+
+Finding all matches in a string +
+

+Searching for all possible matches within a subject can be requested by the +global or altglobal modifier. After finding a match, the matching +function is called again to search the remainder of the subject. The difference +between global and altglobal is that the former uses the +start_offset argument to pcre2_match() or pcre2_dfa_match() +to start searching at a new point within the entire string (which is what Perl +does), whereas the latter passes over a shortened subject. This makes a +difference to the matching process if the pattern begins with a lookbehind +assertion (including \b or \B). +

+

+If an empty string is matched, the next match is done with the +PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for +another, non-empty, match at the same point in the subject. If this match +fails, the start offset is advanced, and the normal match is retried. This +imitates the way Perl handles such cases when using the /g modifier or +the split() function. Normally, the start offset is advanced by one +character, but if the newline convention recognizes CRLF as a newline, and the +current character is CR followed by LF, an advance of two characters occurs. +

+
+Testing substring extraction functions +
+

+The copy and get modifiers can be used to test the +pcre2_substring_copy_xxx() and pcre2_substring_get_xxx() functions. +They can be given more than once, and each can specify a capture group name or +number, for example: +

+   abcd\=copy=1,copy=3,get=G1
+
+If the #subject command is used to set default copy and/or get lists, +these can be unset by specifying a negative number to cancel all numbered +groups and an empty name to cancel all named groups. +

+

+The getall modifier tests pcre2_substring_list_get(), which +extracts all captured substrings. +

+

+If the subject line is successfully matched, the substrings extracted by the +convenience functions are output with C, G, or L after the string number +instead of a colon. This is in addition to the normal full list. The string +length (that is, the return from the extraction function) is given in +parentheses after each substring, followed by the name when the extraction was +by name. +

+
+Testing the substitution function +
+

+If the replace modifier is set, the pcre2_substitute() function is +called instead of one of the matching functions (or after one call of +pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that +replacement strings cannot contain commas, because a comma signifies the end of +a modifier. This is not thought to be an issue in a test program. +

+

+Specifying a completely empty replacement string disables this modifier. +However, it is possible to specify an empty replacement by providing a buffer +length, as described below, for an otherwise empty replacement. +

+

+Unlike subject strings, pcre2test does not process replacement strings +for escape sequences. In UTF mode, a replacement string is checked to see if it +is a valid UTF-8 string. If so, it is correctly converted to a UTF string of +the appropriate code unit width. If it is not a valid UTF-8 string, the +individual code units are copied directly. This provides a means of passing an +invalid UTF-8 string for testing purposes. +

+

+The following modifiers set options (in additional to the normal match options) +for pcre2_substitute(): +

+  global                      PCRE2_SUBSTITUTE_GLOBAL
+  substitute_extended         PCRE2_SUBSTITUTE_EXTENDED
+  substitute_literal          PCRE2_SUBSTITUTE_LITERAL
+  substitute_matched          PCRE2_SUBSTITUTE_MATCHED
+  substitute_overflow_length  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
+  substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
+  substitute_unknown_unset    PCRE2_SUBSTITUTE_UNKNOWN_UNSET
+  substitute_unset_empty      PCRE2_SUBSTITUTE_UNSET_EMPTY
+
+See the +pcre2api +documentation for details of these options. +

+

+After a successful substitution, the modified string is output, preceded by the +number of replacements. This may be zero if there were no matches. Here is a +simple example of a substitution test: +

+  /abc/replace=xxx
+      =abc=abc=
+   1: =xxx=abc=
+      =abc=abc=\=global
+   2: =xxx=xxx=
+
+Subject and replacement strings should be kept relatively short (fewer than 256 +characters) for substitution tests, as fixed-size buffers are used. To make it +easy to test for buffer overflow, if the replacement string starts with a +number in square brackets, that number is passed to pcre2_substitute() as +the size of the output buffer, with the replacement string starting at the next +character. Here is an example that tests the edge case: +
+  /abc/
+      123abc123\=replace=[10]XYZ
+   1: 123XYZ123
+      123abc123\=replace=[9]XYZ
+  Failed: error -47: no more memory
+
+The default action of pcre2_substitute() is to return +PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the +substitute_overflow_length modifier), pcre2_substitute() continues +to go through the motions of matching and substituting (but not doing any +callouts), in order to compute the size of buffer that is required. When this +happens, pcre2test shows the required buffer length (which includes space +for the trailing zero) as part of the error message. For example: +
+  /abc/substitute_overflow_length
+      123abc123\=replace=[9]XYZ
+  Failed: error -47: no more memory: 10 code units are needed
+
+A replacement string is ignored with POSIX and DFA matching. Specifying partial +matching provokes an error return ("bad option value") from +pcre2_substitute(). +

+
+Testing substitute callouts +
+

+If the substitute_callout modifier is set, a substitution callout +function is set up. The null_context modifier must not be set, because +the address of the callout function is passed in a match context. When the +callout function is called (after each substitution), details of the input +and output strings are output. For example: +

+  /abc/g,replace=<$0>,substitute_callout
+      abcdefabcpqr
+   1(1) Old 0 3 "abc" New 0 5 "<abc>"
+   2(1) Old 6 9 "abc" New 8 13 "<abc>"
+   2: <abc>def<abc>pqr
+
+The first number on each callout line is the count of matches. The +parenthesized number is the number of pairs that are set in the ovector (that +is, one more than the number of capturing groups that were set). Then are +listed the offsets of the old substring, its contents, and the same for the +replacement. +

+

+By default, the substitution callout function returns zero, which accepts the +replacement and causes matching to continue if /g was used. Two further +modifiers can be used to test other return values. If substitute_skip is +set to a value greater than zero the callout function returns +1 for the match +of that number, and similarly substitute_stop returns -1. These cause the +replacement to be rejected, and -1 causes no further matching to take place. If +either of them are set, substitute_callout is assumed. For example: +

+  /abc/g,replace=<$0>,substitute_skip=1
+      abcdefabcpqr
+   1(1) Old 0 3 "abc" New 0 5 "<abc> SKIPPED"
+   2(1) Old 6 9 "abc" New 6 11 "<abc>"
+   2: abcdef<abc>pqr
+      abcdefabcpqr\=substitute_stop=1
+   1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
+   1: abcdefabcpqr
+
+If both are set for the same number, stop takes precedence. Only a single skip +or stop is supported, which is sufficient for testing that the feature works. +

+
+Testing substitute case callouts +
+

+If the substitute_case_callout modifier is set, a substitution +case callout function is set up. The callout function is called for each +substituted chunk which is to be case-transformed. +

+

+The callout function passed is a fixed function with implementation for certain +behaviours: inputs which shrink when case-transformed; inputs which grow; inputs +with distinct upper/lower/titlecase forms. The characters which are not +special-cased for testing purposes are left unmodified, as if they are caseless +characters. +

+
+Setting the JIT stack size +
+

+The jitstack modifier provides a way of setting the maximum stack size +that is used by the just-in-time optimization code. It is ignored if JIT +optimization is not being used. The value is a number of kibibytes (units of +1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack +that is larger than the default is necessary only for very complicated +patterns. If jitstack is set non-zero on a subject line it overrides any +value that was set on the pattern. +

+
+Setting heap, match, and depth limits +
+

+The heap_limit, match_limit, and depth_limit modifiers set +the appropriate limits in the match context. These values are ignored when the +find_limits or find_limits_noheap modifier is specified. +

+
+Finding minimum limits +
+

+If the find_limits modifier is present on a subject line, pcre2test +calls the relevant matching function several times, setting different values in +the match context via pcre2_set_heap_limit(), +pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds +the smallest value for each parameter that allows the match to complete without +a "limit exceeded" error. The match itself may succeed or fail. An alternative +modifier, find_limits_noheap, omits the heap limit. This is used in the +standard tests, because the minimum heap limit varies between systems. If JIT +is being used, only the match limit is relevant, and the other two are +automatically omitted. +

+

+When using this modifier, the pattern should not contain any limit settings +such as (*LIMIT_MATCH=...) within it. If such a setting is present and is +lower than the minimum matching value, the minimum value cannot be found +because pcre2_set_match_limit() etc. are only able to reduce the value of +an in-pattern limit; they cannot increase it. +

+

+For non-DFA matching, the minimum depth_limit number is a measure of how +much nested backtracking happens (that is, how deeply the pattern's tree is +searched). In the case of DFA matching, depth_limit controls the depth of +recursive calls of the internal function that is used for handling pattern +recursion, lookaround assertions, and atomic groups. +

+

+For non-DFA matching, the match_limit number is a measure of the amount +of backtracking that takes place, and learning the minimum value can be +instructive. For most simple matches, the number is quite small, but for +patterns with very large numbers of matching possibilities, it can become large +very quickly with increasing length of subject string. In the case of DFA +matching, match_limit controls the total number of calls, both recursive +and non-recursive, to the internal matching function, thus controlling the +overall amount of computing resource that is used. +

+

+For both kinds of matching, the heap_limit number, which is in kibibytes +(units of 1024 bytes), limits the amount of heap memory used for matching. +

+
+Showing MARK names +
+

+The mark modifier causes the names from backtracking control verbs that +are returned from calls to pcre2_match() to be displayed. If a mark is +returned for a match, non-match, or partial match, pcre2test shows it. +For a match, it is on a line by itself, tagged with "MK:". Otherwise, it +is added to the non-match message. +

+
+Showing memory usage +
+

+The memory modifier causes pcre2test to log the sizes of all heap +memory allocation and freeing calls that occur during a call to +pcre2_match() or pcre2_dfa_match(). In the latter case, heap memory +is used only when a match requires more internal workspace that the default +allocation on the stack, so in many cases there will be no output. No heap +memory is allocated during matching with JIT. For this modifier to work, the +null_context modifier must not be set on both the pattern and the +subject, though it can be set on one or the other. +

+
+Showing the heap frame overall vector size +
+

+The heapframes_size modifier is relevant for matches using +pcre2_match() without JIT. After a match has run (whether successful or +not) the size, in bytes, of the allocated heap frames vector that is left +attached to the match data block is shown. If the matching action involved +several calls to pcre2_match() (for example, global matching or for +timing) only the final value is shown. +

+

+This modifier is ignored, with a warning, for POSIX or DFA matching. JIT +matching does not use the heap frames vector, so the size is always zero, +unless there was a previous non-JIT match. Note that specifing a size of zero +for the output vector (see below) causes pcre2test to free its match data +block (and associated heap frames vector) and allocate a new one. +

+
+Setting a starting offset +
+

+The offset modifier sets an offset in the subject string at which +matching starts. Its value is a number of code units, not characters. +

+
+Setting an offset limit +
+

+The offset_limit modifier sets a limit for unanchored matches. If a match +cannot be found starting at or before this offset in the subject, a "no match" +return is given. The data value is a number of code units, not characters. When +this modifier is used, the use_offset_limit modifier must have been set +for the pattern; if not, an error is generated. +

+
+Setting the size of the output vector +
+

+The ovector modifier applies only to the subject line in which it +appears, though of course it can also be used to set a default in a +#subject command. It specifies the number of pairs of offsets that are +available for storing matching information. The default is 15. +

+

+A value of zero is useful when testing the POSIX API because it causes +regexec() to be called with a NULL capture vector. When not testing the +POSIX API, a value of zero is used to cause +pcre2_match_data_create_from_pattern() to be called, in order to create a +new match block of exactly the right size for the pattern. (It is not possible +to create a match block with a zero-length ovector; there is always at least +one pair of offsets.) The old match data block is freed. +

+
+Passing the subject as zero-terminated +
+

+By default, the subject string is passed to a native API matching function with +its correct length. In order to test the facility for passing a zero-terminated +string, the zero_terminate modifier is provided. It causes the length to +be passed as PCRE2_ZERO_TERMINATED. When matching via the POSIX interface, +this modifier is ignored, with a warning. +

+

+When testing pcre2_substitute(), this modifier also has the effect of +passing the replacement string as zero-terminated. +

+
+Passing a NULL context, subject, or replacement +
+

+Normally, pcre2test passes a context block to pcre2_match(), +pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). +If the null_context modifier is set, however, NULL is passed. This is for +testing that the matching and substitution functions behave correctly in this +case (they use default values). This modifier cannot be used with the +find_limits, find_limits_noheap, or substitute_callout +modifiers. +

+

+Similarly, for testing purposes, if the null_subject or +null_replacement modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. +

+
THE ALTERNATIVE MATCHING FUNCTION
+

+By default, pcre2test uses the standard PCRE2 matching function, +pcre2_match() to match each subject line. PCRE2 also supports an +alternative matching function, pcre2_dfa_match(), which operates in a +different way, and has some restrictions. The differences between the two +functions are described in the +pcre2matching +documentation. +

+

+If the dfa modifier is set, the alternative matching function is used. +This function finds all possible matches at a given point in the subject. If, +however, the dfa_shortest modifier is set, processing stops after the +first match is found. This is always the shortest possible match. +

+
DEFAULT OUTPUT FROM pcre2test
+

+This section describes the output when the normal matching function, +pcre2_match(), is being used. +

+

+When a match succeeds, pcre2test outputs the list of captured substrings, +starting with number 0 for the string that matched the whole pattern. +Otherwise, it outputs "No match" when the return is PCRE2_ERROR_NOMATCH, or +"Partial match:" followed by the partially matching substring when the +return is PCRE2_ERROR_PARTIAL. (Note that this is the +entire substring that was inspected during the partial match; it may include +characters before the actual match start if a lookbehind assertion, \K, \b, +or \B was involved.) +

+

+For any other return, pcre2test outputs the PCRE2 negative error number +and a short descriptive phrase. If the error is a failed UTF string check, the +code unit offset of the start of the failing character is also output. Here is +an example of an interactive pcre2test run. +

+  $ pcre2test
+  PCRE2 version 10.22 2016-07-29
+
+    re> /^abc(\d+)/
+  data> abc123
+   0: abc123
+   1: 123
+  data> xyz
+  No match
+
+Unset capturing substrings that are not followed by one that is set are not +shown by pcre2test unless the allcaptures modifier is specified. In +the following example, there are two capturing substrings, but when the first +data line is matched, the second, unset substring is not shown. An "internal" +unset substring is shown as "<unset>", as for the second data line. +
+    re> /(a)|(b)/
+  data> a
+   0: a
+   1: a
+  data> b
+   0: b
+   1: <unset>
+   2: b
+
+If the strings contain any non-printing characters, they are output as \xhh +escapes if the value is less than 256 and UTF mode is not set. Otherwise they +are output as \x{hh...} escapes. See below for the definition of non-printing +characters. If the aftertext modifier is set, the output for substring 0 +is followed by the rest of the subject string, identified by "0+" like this: +
+    re> /cat/aftertext
+  data> cataract
+   0: cat
+   0+ aract
+
+If global matching is requested, the results of successive matching attempts +are output in sequence, like this: +
+    re> /\Bi(\w\w)/g
+  data> Mississippi
+   0: iss
+   1: ss
+   0: iss
+   1: ss
+   0: ipp
+   1: pp
+
+"No match" is output only if the first match attempt fails. Here is an example +of a failure message (the offset 4 that is specified by the offset +modifier is past the end of the subject string): +
+    re> /xyz/
+  data> xyz\=offset=4
+  Error -24 (bad offset value)
+
+

+

+Note that whereas patterns can be continued over several lines (a plain ">" +prompt is used for continuations), subject lines may not. However newlines can +be included in a subject by means of the \n escape (or \r, \r\n, etc., +depending on the newline sequence setting). +

+
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
+

+When the alternative matching function, pcre2_dfa_match(), is used, the +output consists of a list of all the matches that start at the first point in +the subject where there is at least one match. For example: +

+    re> /(tang|tangerine|tan)/
+  data> yellow tangerine\=dfa
+   0: tangerine
+   1: tang
+   2: tan
+
+Using the normal matching function on this data finds only "tang". The +longest matching string is always given first (and numbered zero). After a +PCRE2_ERROR_PARTIAL return, the output is "Partial match:", followed by the +partially matching substring. Note that this is the entire substring that was +inspected during the partial match; it may include characters before the actual +match start if a lookbehind assertion, \b, or \B was involved. (\K is not +supported for DFA matching.) +

+

+If global matching is requested, the search for further matches resumes +at the end of the longest match. For example: +

+    re> /(tang|tangerine|tan)/g
+  data> yellow tangerine and tangy sultana\=dfa
+   0: tangerine
+   1: tang
+   2: tan
+   0: tang
+   1: tan
+   0: tan
+
+The alternative matching function does not support substring capture, so the +modifiers that are concerned with captured substrings are not relevant. +

+
RESTARTING AFTER A PARTIAL MATCH
+

+When the alternative matching function has given the PCRE2_ERROR_PARTIAL +return, indicating that the subject partially matched the pattern, you can +restart the match with additional subject data by means of the +dfa_restart modifier. For example: +

+    re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
+  data> 23ja\=ps,dfa
+  Partial match: 23ja
+  data> n05\=dfa,dfa_restart
+   0: n05
+
+For further information about partial matching, see the +pcre2partial +documentation. +

+
CALLOUTS
+

+If the pattern contains any callout requests, pcre2test's callout +function is called during matching unless callout_none is specified. This +works with both matching functions, and with JIT, though there are some +differences in behaviour. The output for callouts with numerical arguments and +those with string arguments is slightly different. +

+
+Callouts with numerical arguments +
+

+By default, the callout function displays the callout number, the start and +current positions in the subject text at the callout time, and the next pattern +item to be tested. For example: +

+  --->pqrabcdef
+    0    ^  ^     \d
+
+This output indicates that callout number 0 occurred for a match attempt +starting at the fourth character of the subject string, when the pointer was at +the seventh character, and when the next pattern item was \d. Just +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. +

+

+Callouts numbered 255 are assumed to be automatic callouts, inserted as a +result of the auto_callout pattern modifier. In this case, instead of +showing the callout number, the offset in the pattern, preceded by a plus, is +output. For example: +

+    re> /\d?[A-E]\*/auto_callout
+  data> E*
+  --->E*
+   +0 ^      \d?
+   +3 ^      [A-E]
+   +8 ^^     \*
+  +10 ^ ^
+   0: E*
+
+If a pattern contains (*MARK) items, an additional line is output whenever +a change of latest mark is passed to the callout function. For example: +
+    re> /a(*MARK:X)bc/auto_callout
+  data> abc
+  --->abc
+   +0 ^       a
+   +1 ^^      (*MARK:X)
+  +10 ^^      b
+  Latest Mark: X
+  +11 ^ ^     c
+  +12 ^  ^
+   0: abc
+
+The mark changes between matching "a" and "b", but stays the same for the rest +of the match, so nothing more is output. If, as a result of backtracking, the +mark reverts to being unset, the text "<unset>" is output. +

+
+Callouts with string arguments +
+

+The output for a callout with a string argument is similar, except that instead +of outputting a callout number before the position indicators, the callout +string and its offset in the pattern string are output before the reflection of +the subject string, and the subject string is reflected for each callout. For +example: +

+    re> /^ab(?C'first')cd(?C"second")ef/
+  data> abcdefg
+  Callout (7): 'first'
+  --->abcdefg
+      ^ ^         c
+  Callout (20): "second"
+  --->abcdefg
+      ^   ^       e
+   0: abcdef
+
+
+

+
+Callout modifiers +
+

+The callout function in pcre2test returns zero (carry on matching) by +default, but you can use a callout_fail modifier in a subject line to +change this and other parameters of the callout (see below). +

+

+If the callout_capture modifier is set, the current captured groups are +output when a callout occurs. This is useful only for non-DFA matching, as +pcre2_dfa_match() does not support capturing, so no captures are ever +shown. +

+

+The normal callout output, showing the callout number or pattern offset (as +described above) is suppressed if the callout_no_where modifier is set. +

+

+When using the interpretive matching function pcre2_match() without JIT, +setting the callout_extra modifier causes additional output from +pcre2test's callout function to be generated. For the first callout in a +match attempt at a new starting position in the subject, "New match attempt" is +output. If there has been a backtrack since the last callout (or start of +matching if this is the first callout), "Backtrack" is output, followed by "No +other matching paths" if the backtrack ended the previous match attempt. For +example: +

+   re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess
+  data> aac\=callout_extra
+  New match attempt
+  --->aac
+   +0 ^       (
+   +1 ^       a+
+   +3 ^ ^     )
+   +4 ^ ^     b
+  Backtrack
+  --->aac
+   +3 ^^      )
+   +4 ^^      b
+  Backtrack
+  No other matching paths
+  New match attempt
+  --->aac
+   +0  ^      (
+   +1  ^      a+
+   +3  ^^     )
+   +4  ^^     b
+  Backtrack
+  No other matching paths
+  New match attempt
+  --->aac
+   +0   ^     (
+   +1   ^     a+
+  Backtrack
+  No other matching paths
+  New match attempt
+  --->aac
+   +0    ^    (
+   +1    ^    a+
+  No match
+
+Notice that various optimizations must be turned off if you want all possible +matching paths to be scanned. If no_start_optimize is not used, there is +an immediate "no match", without any callouts, because the starting +optimization fails to find "b" in the subject, which it knows must be present +for any match. If no_auto_possess is not used, the "a+" item is turned +into "a++", which reduces the number of backtracks. +

+

+The callout_extra modifier has no effect if used with the DFA matching +function, or with JIT. +

+
+Return values from callouts +
+

+The default return from the callout function is zero, which allows matching to +continue. The callout_fail modifier can be given one or two numbers. If +there is only one number, 1 is returned instead of 0 (causing matching to +backtrack) when a callout of that number is reached. If two numbers (<n>:<m>) +are given, 1 is returned when callout <n> is reached and there have been at +least <m> callouts. The callout_error modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +callout_error takes precedence. Note that callouts with string arguments +are always given the number zero. +

+

+The callout_data modifier can be given an unsigned or a negative number. +This is set as the "user data" that is passed to the matching function, and +passed back when the callout function is invoked. Any value other than zero is +used as a return from pcre2test's callout function. +

+

+Inserting callouts can be helpful when using pcre2test to check +complicated regular expressions. For further information about callouts, see +the +pcre2callout +documentation. +

+
NON-PRINTING CHARACTERS
+

+When pcre2test is outputting text in the compiled version of a pattern, +bytes other than 32-126 are always treated as non-printing characters and are +therefore shown as hex escapes. +

+

+When pcre2test is outputting text that is a matched part of a subject +string, it behaves in the same way, unless a different locale has been set for +the pattern (using the locale modifier). In this case, the +isprint() function is used to distinguish printing and non-printing +characters. +

+
SAVING AND RESTORING COMPILED PATTERNS
+

+It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. JIT data cannot be saved. The host +on which the patterns are reloaded must be running the same version of PCRE2, +with the same code unit width, and must also have the same endianness, pointer +width and PCRE2_SIZE type. Before compiled patterns can be saved they must be +serialized, that is, converted to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). +

+

+The functions whose names begin with pcre2_serialize_ are used +for serializing and de-serializing. They are described in the +pcre2serialize +documentation. In this section we describe the features of pcre2test that +can be used to test these functions. +

+

+Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET. It just makes a reloadable byte code stream. +Hence the restrictions on reloading mentioned above. +

+

+In pcre2test, when a pattern with push modifier is successfully +compiled, it is pushed onto a stack of compiled patterns, and pcre2test +expects the next line to contain a new pattern (or command) instead of a +subject line. By contrast, the pushcopy modifier causes a copy of the +compiled pattern to be stacked, leaving the original available for immediate +matching. By using push and/or pushcopy, a number of patterns can +be compiled and retained. These modifiers are incompatible with posix, +and control modifiers that act at match time are ignored (with a message) for +the stacked patterns. The jitverify modifier applies only at compile +time. +

+

+The command +

+  #save <filename>
+
+causes all the stacked patterns to be serialized and the result written to the +named file. Afterwards, all the stacked patterns are freed. The command +
+  #load <filename>
+
+reads the data in the file, and then arranges for it to be de-serialized, with +the resulting compiled patterns added to the pattern stack. The pattern on the +top of the stack can be retrieved by the #pop command, which must be followed +by lines of subjects that are to be matched with the pattern, terminated as +usual by an empty line or end of file. This command may be followed by a +modifier list containing only +control modifiers +that act after a pattern has been compiled. In particular, hex, +posix, posix_nosub, push, and pushcopy are not allowed, +nor are any +option-setting modifiers. +The JIT modifiers are, however permitted. Here is an example that saves and +reloads two patterns. +
+  /abc/push
+  /xyz/push
+  #save tempfile
+  #load tempfile
+  #pop info
+  xyz
+
+  #pop jit,bincode
+  abc
+
+If jitverify is used with #pop, it does not automatically imply +jit, which is different behaviour from when it is used on a pattern. +

+

+The #popcopy command is analogous to the pushcopy modifier in that it +makes current a copy of the topmost stack pattern, leaving the original still +on the stack. +

+
SEE ALSO
+

+pcre2(3), pcre2api(3), pcre2callout(3), +pcre2jit, pcre2matching(3), pcre2partial(d), +pcre2pattern(3), pcre2serialize(3). +

+
AUTHOR
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
REVISION
+

+Last updated: 26 December 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/html/pcre2unicode.html b/3rd/pcre2/doc/html/pcre2unicode.html new file mode 100644 index 00000000..5b425329 --- /dev/null +++ b/3rd/pcre2/doc/html/pcre2unicode.html @@ -0,0 +1,556 @@ + + +pcre2unicode specification + + +

pcre2unicode man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+
+UNICODE AND UTF SUPPORT +
+

+PCRE2 is normally built with Unicode support, though if you do not need it, you +can build it without, in which case the library will be smaller. With Unicode +support, PCRE2 has knowledge of Unicode character properties and can process +strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit +width), but this is not the default. Unless specifically requested, PCRE2 +treats each code unit in a string as one character. +

+

+There are two ways of telling PCRE2 to switch to UTF mode, where characters may +consist of more than one code unit and the range of values is constrained. The +program can call +pcre2_compile() +with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF). +However, the latter facility can be locked out by the PCRE2_NEVER_UTF option. +That is, the programmer can prevent the supplier of the pattern from switching +to UTF mode. +

+

+Note that the PCRE2_MATCH_INVALID_UTF option (see +below) +forces PCRE2_UTF to be set. +

+

+In UTF mode, both the pattern and any subject strings that are matched against +it are treated as UTF strings instead of strings of individual one-code-unit +characters. There are also some other changes to the way characters are +handled, as documented below. +

+
+UNICODE PROPERTY SUPPORT +
+

+When PCRE2 is built with Unicode support, the escape sequences \p{..}, +\P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting. +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the derived properties +Any and Lc (synonym L&), the Unicode script names such as Arabic or Han, +Bidi_Class, Bidi_Control, and a few binary properties. +

+

+The full lists are given in the +pcre2pattern +and +pcre2syntax +documentation. In general, only the short names for properties are supported. +For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. +

+
+WIDE CHARACTERS AND UTF MODES +
+

+Code points less than 256 can be specified in patterns by either braced or +unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger +values have to use braced sequences. Unbraced octal code points up to \777 are +also recognized; larger ones can be coded using \o{...}. +

+

+The escape sequence \N{U+<hex digits>} is recognized as another way of +specifying a Unicode character by code point in a UTF mode. It is not allowed +in non-UTF mode. +

+

+In UTF mode, repeat quantifiers apply to complete UTF characters, not to +individual code units. +

+

+In UTF mode, the dot metacharacter matches one UTF character instead of a +single code unit. +

+

+In UTF mode, capture group names are not restricted to ASCII, and may contain +any Unicode letters and decimal digits, as well as underscore. +

+

+The escape sequence \C can be used to match a single code unit in UTF mode, +but its use can lead to some strange effects because it breaks up multi-unit +characters (see the description of \C in the +pcre2pattern +documentation). For this reason, there is a build-time option that disables +support for \C completely. There is also a less draconian compile-time option +for locking out the use of \C when a pattern is compiled. +

+

+The use of \C is not supported by the alternative matching function +pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a character +may consist of more than one code unit. The use of \C in these modes provokes +a match-time error. Also, the JIT optimization does not support \C in these +modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that +contains \C, it will not succeed, and so when pcre2_match() is called, +the matching will be carried out by the interpretive function. +

+

+The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test +characters of any code value, but, by default, the characters that PCRE2 +recognizes as digits, spaces, or word characters remain the same set as in +non-UTF mode, all with code points less than 256. This remains true even when +PCRE2 is built to include Unicode support, because to do otherwise would slow +down matching in many common cases. Note that this also applies to \b +and \B, because they are defined in terms of \w and \W. If you want +to test for a wider sense of, say, "digit", you can use explicit Unicode +property tests such as \p{Nd}. Alternatively, if you set the PCRE2_UCP option, +the way that the character escapes work is changed so that Unicode properties +are used to determine which characters match, though there are some options +that suppress this for individual escapes. For details see the section on +generic character types +in the +pcre2pattern +documentation. +

+

+Like the escapes, characters that match the POSIX named character classes are +all low-valued characters unless the PCRE2_UCP option is set, but there is an +option to override this. +

+

+In contrast to the character escapes and character classes, the special +horizontal and vertical white space escapes (\h, \H, \v, and \V) do match +all the appropriate Unicode characters, whether or not PCRE2_UCP is set. +

+
+UNICODE CASE-EQUIVALENCE +
+

+If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use +of Unicode properties except for characters whose code points are less than 128 +and that have at most two case-equivalent values. For these, a direct table +lookup is used for speed. A few Unicode characters such as Greek sigma have +more than two code points that are case-equivalent, and these are treated +specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case +processing for non-UTF character encodings such as UCS-2. +

+

+There are two ASCII characters (S and K) that, in addition to their ASCII lower +case equivalents, have a non-ASCII one as well (long S and Kelvin sign). +Recognition of these non-ASCII characters as case-equivalent to their ASCII +counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT +option. When this is set, all characters in a case equivalence must either be +ASCII or non-ASCII; there can be no mixing. +

+    Without PCRE2_EXTRA_CASELESS_RESTRICT:
+      'k' = 'K' = U+212A (Kelvin sign)
+      's' = 'S' = U+017F (long S)
+    With PCRE2_EXTRA_CASELESS_RESTRICT:
+      'k' = 'K'
+      U+212A (Kelvin sign)  only case-equivalent to itself
+      's' = 'S'
+      U+017F (long S)       only case-equivalent to itself
+
+

+

+One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +

+    Without PCRE2_EXTRA_TURKISH_CASING:
+      'i' = 'I'
+      U+0130 (capital I with dot above)  only case-equivalent to itself
+      U+0131 (small dotless i)           only case-equivalent to itself
+    With PCRE2_EXTRA_TURKISH_CASING:
+      'i' = U+0130 (capital I with dot above)
+      U+0131 (small dotless i) = 'I'
+
+

+

+It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and +PCRE2_EXTRA_TURKISH_CASING together. +

+

+From release 10.45 the Unicode letter properties Lu (upper case), Ll (lower +case), and Lt (title case) are all treated as Lc (cased letter) when caseless +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. +

+
+SCRIPT RUNS +
+

+The pattern constructs (*script_run:...) and (*atomic_script_run:...), with +synonyms (*sr:...) and (*asr:...), verify that the string matched within the +parentheses is a script run. In concept, a script run is a sequence of +characters that are all from the same Unicode script. However, because some +scripts are commonly used together, and because some diacritical and other +marks are used with multiple scripts, it is not that simple. +

+

+Every Unicode character has a Script property, mostly with a value +corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There +are also three special values: +

+

+"Unknown" is used for code points that have not been assigned, and also for the +surrogate code points. In the PCRE2 32-bit library, characters whose code +points are greater than the Unicode maximum (U+10FFFF), which are accessible +only in non-UTF mode, are assigned the Unknown script. +

+

+"Common" is used for characters that are used with many scripts. These include +punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII +digits 0 to 9. +

+

+"Inherited" is used for characters such as diacritical marks that modify a +previous character. These are considered to take on the script of the character +that they modify. +

+

+Some Inherited characters are used with many scripts, but many of them are only +normally used with a small number of scripts. For example, U+102E0 (Coptic +Epact thousands mark) is used only with Arabic and Coptic. In order to make it +possible to check this, a Unicode property called Script Extension exists. Its +value is a list of scripts that apply to the character. For the majority of +characters, the list contains just one script, the same one as the Script +property. However, for characters such as U+102E0 more than one Script is +listed. There are also some Common characters that have a single, non-Common +script in their Script Extension list. +

+

+The next section describes the basic rules for deciding whether a given string +of characters is a script run. Note, however, that there are some special cases +involving the Chinese Han script, and an additional constraint for decimal +digits. These are covered in subsequent sections. +

+
+Basic script run rules +
+

+A string that is less than two characters long is a script run. This is the +only case in which an Unknown character can be part of a script run. Longer +strings are checked using only the Script Extensions property, not the basic +Script property. +

+

+If a character's Script Extension property is the single value "Inherited", it +is always accepted as part of a script run. This is also true for the property +"Common", subject to the checking of decimal digits described below. All the +remaining characters in a script run must have at least one script in common in +their Script Extension lists. In set-theoretic terminology, the intersection of +all the sets of scripts must not be empty. +

+

+A simple example is an Internet name such as "google.com". The letters are all +in the Latin script, and the dot is Common, so this string is a script run. +However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a +string that looks the same, but with Cyrillic "o"s is not a script run. +

+

+More interesting examples involve characters with more than one script in their +Script Extension. Consider the following characters: +

+  U+060C  Arabic comma
+  U+06D4  Arabic full stop
+
+The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and +Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could +appear in script runs of either Arabic or Hanifi Rohingya. The first could also +appear in Syriac or Thaana script runs, but the second could not. +

+
+The Chinese Han script +
+

+The Chinese Han script is commonly used in conjunction with other scripts for +writing certain languages. Japanese uses the Hiragana and Katakana scripts +together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo +and Han. These three combinations are treated as special cases when checking +script runs and are, in effect, "virtual scripts". Thus, a script run may +contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and +Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of +Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical +Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/) +in allowing such mixtures. +

+
+Decimal digits +
+

+Unicode contains many sets of 10 decimal digits in different scripts, and some +scripts (including the Common script) contain more than one set. Some of these +decimal digits them are visually indistinguishable from the common ASCII +digits. In addition to the script checking described above, if a script run +contains any decimal digits, they must all come from the same set of 10 +adjacent characters. +

+
+VALIDITY OF UTF STRINGS +
+

+When the PCRE2_UTF option is set, the strings passed as patterns and subjects +are (by default) checked for validity on entry to the relevant functions. If an +invalid UTF string is passed, a negative error code is returned. The code unit +offset to the offending character can be extracted from the match data block by +calling pcre2_get_startchar(), which is used for this purpose after a UTF +error. +

+

+In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance, for +example in the case of a long subject string that is being scanned repeatedly. +If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time, +PCRE2 assumes that the pattern or subject it is given (respectively) contains +only valid UTF code unit sequences. +

+

+If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result +is undefined and your program may crash or loop indefinitely or give incorrect +results. There is, however, one mode of matching that can handle invalid UTF +subject strings. This is enabled by passing PCRE2_MATCH_INVALID_UTF to +pcre2_compile() and is discussed below in the next section. The rest of +this section covers the case when PCRE2_MATCH_INVALID_UTF is not set. +

+

+Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the UTF check +for the pattern; it does not also apply to subject strings. If you want to +disable the check for a subject string you must pass this same option to +pcre2_match() or pcre2_dfa_match(). +

+

+UTF-16 and UTF-32 strings can indicate their endianness by special code knows +as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting +strings to be in host byte order. +

+

+Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other +processing takes place. In the case of pcre2_match() and +pcre2_dfa_match() calls with a non-zero starting offset, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \b and \B are +one-character lookbehinds. +

+

+In addition to checking the format of the string, there is a check to ensure +that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate +area. The so-called "non-character" code points are not excluded because +Unicode corrigendum #9 makes it clear that they should not be. +

+

+Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, +where they are used in pairs to encode code points with values greater than +0xFFFF. The code points that are encoded by UTF-16 pairs are available +independently in the UTF-8 and UTF-32 encodings. (In other words, the whole +surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and +UTF-32.) +

+

+Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is +given if an escape sequence for an invalid Unicode code point is encountered in +the pattern. If you want to allow escape sequences such as \x{d800} (a +surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option. However, this is possible only in UTF-8 and UTF-32 modes, because these +values are not representable in UTF-16. +

+
+Errors in UTF-8 strings +
+

+The following negative error codes are given for invalid UTF-8 strings: +

+  PCRE2_ERROR_UTF8_ERR1
+  PCRE2_ERROR_UTF8_ERR2
+  PCRE2_ERROR_UTF8_ERR3
+  PCRE2_ERROR_UTF8_ERR4
+  PCRE2_ERROR_UTF8_ERR5
+
+The string ends with a truncated UTF-8 character; the code specifies how many +bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be +no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279) +allows for up to 6 bytes, and this is checked first; hence the possibility of +4 or 5 missing bytes. +
+  PCRE2_ERROR_UTF8_ERR6
+  PCRE2_ERROR_UTF8_ERR7
+  PCRE2_ERROR_UTF8_ERR8
+  PCRE2_ERROR_UTF8_ERR9
+  PCRE2_ERROR_UTF8_ERR10
+
+The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the +character do not have the binary value 0b10 (that is, either the most +significant bit is 0, or the next bit is 1). +
+  PCRE2_ERROR_UTF8_ERR11
+  PCRE2_ERROR_UTF8_ERR12
+
+A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long; +these code points are excluded by RFC 3629. +
+  PCRE2_ERROR_UTF8_ERR13
+
+A 4-byte character has a value greater than 0x10ffff; these code points are +excluded by RFC 3629. +
+  PCRE2_ERROR_UTF8_ERR14
+
+A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of +code points are reserved by RFC 3629 for use with UTF-16, and so are excluded +from UTF-8. +
+  PCRE2_ERROR_UTF8_ERR15
+  PCRE2_ERROR_UTF8_ERR16
+  PCRE2_ERROR_UTF8_ERR17
+  PCRE2_ERROR_UTF8_ERR18
+  PCRE2_ERROR_UTF8_ERR19
+
+A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a +value that can be represented by fewer bytes, which is invalid. For example, +the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just +one byte. +
+  PCRE2_ERROR_UTF8_ERR20
+
+The two most significant bits of the first byte of a character have the binary +value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a +byte can only validly occur as the second or subsequent byte of a multi-byte +character. +
+  PCRE2_ERROR_UTF8_ERR21
+
+The first byte of a character has the value 0xfe or 0xff. These values can +never occur in a valid UTF-8 string. +

+
+Errors in UTF-16 strings +
+

+The following negative error codes are given for invalid UTF-16 strings: +

+  PCRE2_ERROR_UTF16_ERR1  Missing low surrogate at end of string
+  PCRE2_ERROR_UTF16_ERR2  Invalid low surrogate follows high surrogate
+  PCRE2_ERROR_UTF16_ERR3  Isolated low surrogate
+
+
+

+
+Errors in UTF-32 strings +
+

+The following negative error codes are given for invalid UTF-32 strings: +

+  PCRE2_ERROR_UTF32_ERR1  Surrogate character (0xd800 to 0xdfff)
+  PCRE2_ERROR_UTF32_ERR2  Code point is greater than 0x10ffff
+
+
+

+
+MATCHING IN INVALID UTF STRINGS +
+

+You can run pattern matches on subject strings that may contain invalid UTF +sequences if you call pcre2_compile() with the PCRE2_MATCH_INVALID_UTF +option. This is supported by pcre2_match(), including JIT matching, but +not by pcre2_dfa_match(). When PCRE2_MATCH_INVALID_UTF is set, it forces +PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a +valid UTF string. +

+

+If you do not set PCRE2_MATCH_INVALID_UTF when calling pcre2_compile, and +you are not certain that your subject strings are valid UTF sequences, you +should not make use of the JIT "fast path" function pcre2_jit_match() +because it bypasses sanity checks, including the one for UTF validity. An +invalid string may cause undefined behaviour, including looping, crashing, or +giving the wrong answer. +

+

+Setting PCRE2_MATCH_INVALID_UTF does not affect what pcre2_compile() +generates, but if pcre2_jit_compile() is subsequently called, it does +generate different code. If JIT is not used, the option affects the behaviour +of the interpretive code in pcre2_match(). When PCRE2_MATCH_INVALID_UTF +is set at compile time, PCRE2_NO_UTF_CHECK is ignored at match time. +

+

+In this mode, an invalid code unit sequence in the subject never matches any +pattern item. It does not match dot, it does not match \p{Any}, it does not +even match negative items such as [^X]. A lookbehind assertion fails if it +encounters an invalid sequence while moving the current point backwards. In +other words, an invalid UTF code unit sequence acts as a barrier which no match +can cross. +

+

+You can also think of this as the subject being split up into fragments of +valid UTF, delimited internally by invalid code unit sequences. The pattern is +matched fragment by fragment. The result of a successful match, however, is +given as code unit offsets in the entire subject string in the usual way. There +are a few points to consider: +

+

+The internal boundaries are not interpreted as the beginnings or ends of lines +and so do not match circumflex or dollar characters in the pattern. +

+

+If pcre2_match() is called with an offset that points to an invalid +UTF-sequence, that sequence is skipped, and the match starts at the next valid +UTF character, or the end of the subject. +

+

+At internal fragment boundaries, \b and \B behave in the same way as at the +beginning and end of the subject. For example, a sequence such as \bWORD\b +would match an instance of WORD that is surrounded by invalid UTF code units. +

+

+Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary +data, knowing that any matched strings that are returned are valid UTF. This +can be useful when searching for UTF text in executable or other binary files. +

+

+Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. +

+
+AUTHOR +
+

+Philip Hazel +
+Retired from University Computing Service +
+Cambridge, England. +
+

+
+REVISION +
+

+Last updated: 27 November 2024 +
+Copyright © 1997-2024 University of Cambridge. +
+

+Return to the PCRE2 index page. +

diff --git a/3rd/pcre2/doc/index.html.src b/3rd/pcre2/doc/index.html.src new file mode 100644 index 00000000..2d81b678 --- /dev/null +++ b/3rd/pcre2/doc/index.html.src @@ -0,0 +1,327 @@ + + + +PCRE2 specification + + +

Perl-compatible Regular Expressions (revised API: PCRE2)

+

+The HTML documentation for PCRE2 consists of a number of pages that are listed +below in alphabetical order. If you are new to PCRE2, please read the first one +first. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2  Introductory page
pcre2-config  Information about the installation configuration
pcre2api  PCRE2's native API
pcre2build  Building PCRE2
pcre2callout  The callout facility
pcre2compat  Compability with Perl
pcre2convert  Experimental foreign pattern conversion functions
pcre2demo  A demonstration C program that uses the PCRE2 library
pcre2grep  The pcre2grep command
pcre2jit  Discussion of the just-in-time optimization support
pcre2limits  Details of size and other limits
pcre2matching  Discussion of the two matching algorithms
pcre2partial  Using PCRE2 for partial matching
pcre2pattern  Specification of the regular expressions supported by PCRE2
pcre2perform  Some comments on performance
pcre2posix  The POSIX API to the PCRE2 8-bit library
pcre2sample  Discussion of the pcre2demo program
pcre2serialize  Serializing functions for saving precompiled patterns
pcre2syntax  Syntax quick-reference summary
pcre2test  The pcre2test command for testing PCRE2
pcre2unicode  Discussion of Unicode and UTF-8/UTF-16/UTF-32 support
+ +

+There are also individual pages that summarize the interface for each function +in the library. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
pcre2_callout_enumerate  Enumerate callouts in a compiled pattern
pcre2_code_copy  Copy a compiled pattern
pcre2_code_copy_with_tables  Copy a compiled pattern and its character tables
pcre2_code_free  Free a compiled pattern
pcre2_compile  Compile a regular expression pattern
pcre2_compile_context_copy  Copy a compile context
pcre2_compile_context_create  Create a compile context
pcre2_compile_context_free  Free a compile context
pcre2_config  Show build-time configuration options
pcre2_convert_context_copy  Copy a convert context
pcre2_convert_context_create  Create a convert context
pcre2_convert_context_free  Free a convert context
pcre2_converted_pattern_free  Free converted foreign pattern
pcre2_dfa_match  Match a compiled pattern to a subject string + (DFA algorithm; not Perl compatible)
pcre2_general_context_copy  Copy a general context
pcre2_general_context_create  Create a general context
pcre2_general_context_free  Free a general context
pcre2_get_error_message  Get textual error message for error number
pcre2_get_mark  Get a (*MARK) name
pcre2_get_match_data_size  Get the size of a match data block
pcre2_get_ovector_count  Get the ovector count
pcre2_get_ovector_pointer  Get a pointer to the ovector
pcre2_get_startchar  Get the starting character offset
pcre2_jit_compile  Process a compiled pattern with the JIT compiler
pcre2_jit_free_unused_memory  Free unused JIT memory
pcre2_jit_match  Fast path interface to JIT matching
pcre2_jit_stack_assign  Assign stack for JIT matching
pcre2_jit_stack_create  Create a stack for JIT matching
pcre2_jit_stack_free  Free a JIT matching stack
pcre2_maketables  Build character tables in current locale
pcre2_maketables_free  Free character tables
pcre2_match  Match a compiled pattern to a subject string + (Perl compatible)
pcre2_match_context_copy  Copy a match context
pcre2_match_context_create  Create a match context
pcre2_match_context_free  Free a match context
pcre2_match_data_create  Create a match data block
pcre2_match_data_create_from_pattern  Create a match data block getting size from pattern
pcre2_match_data_free  Free a match data block
pcre2_pattern_convert  Experimental foreign pattern converter
pcre2_pattern_info  Extract information about a pattern
pcre2_serialize_decode  Decode serialized compiled patterns
pcre2_serialize_encode  Serialize compiled patterns for save/restore
pcre2_serialize_free  Free serialized compiled patterns
pcre2_serialize_get_number_of_codes  Get number of serialized compiled patterns
pcre2_set_bsr  Set \R convention
pcre2_set_callout  Set up a callout function
pcre2_set_character_tables  Set character tables
pcre2_set_compile_extra_options  Set compile time extra options
pcre2_set_compile_recursion_guard  Set up a compile recursion guard function
pcre2_set_depth_limit  Set the match backtracking depth limit
pcre2_set_glob_escape  Set glob escape character
pcre2_set_glob_separator  Set glob separator character
pcre2_set_heap_limit  Set the match backtracking heap limit
pcre2_set_match_limit  Set the match limit
pcre2_set_max_pattern_compiled_length  Set the maximum length of a compiled pattern
pcre2_set_max_pattern_length  Set the maximum length of a pattern
pcre2_set_max_varlookbehind  Set the maximum match length for a variable-length lookbehind
pcre2_set_newline  Set the newline convention
pcre2_set_offset_limit  Set the offset limit
pcre2_set_optimize  Set an optimization directive
pcre2_set_parens_nest_limit  Set the parentheses nesting limit
pcre2_set_recursion_limit  Obsolete: use pcre2_set_depth_limit
pcre2_set_recursion_memory_management  Obsolete function that (from 10.30 onwards) does nothing
pcre2_set_substitute_callout  Set a substitution callout function
pcre2_set_substitute_case_callout  Set a substitution case callout function
pcre2_substitute  Match a compiled pattern to a subject string and do + substitutions
pcre2_substring_copy_byname  Extract named substring into given buffer
pcre2_substring_copy_bynumber  Extract numbered substring into given buffer
pcre2_substring_free  Free extracted substring
pcre2_substring_get_byname  Extract named substring into new memory
pcre2_substring_get_bynumber  Extract numbered substring into new memory
pcre2_substring_length_byname  Find length of named substring
pcre2_substring_length_bynumber  Find length of numbered substring
pcre2_substring_list_free  Free list of extracted substrings
pcre2_substring_list_get  Extract all substrings into new memory
pcre2_substring_nametable_scan  Find table entries for given string name
pcre2_substring_number_from_name  Convert captured string name to number
+ + + diff --git a/3rd/pcre2/doc/pcre2-config.1 b/3rd/pcre2/doc/pcre2-config.1 new file mode 100644 index 00000000..ac139b97 --- /dev/null +++ b/3rd/pcre2/doc/pcre2-config.1 @@ -0,0 +1,86 @@ +.TH PCRE2-CONFIG 1 "28 September 2014" "PCRE2 10.45" +.SH NAME +pcre2-config - program to return PCRE2 configuration +.SH SYNOPSIS +.rs +.sp +.nf +.B pcre2-config [--prefix] [--exec-prefix] [--version] +.B " [--libs8] [--libs16] [--libs32] [--libs-posix]" +.B " [--cflags] [--cflags-posix]" +.fi +. +. +.SH DESCRIPTION +.rs +.sp +\fBpcre2-config\fP returns the configuration of the installed PCRE2 libraries +and the options required to compile a program to use them. Some of the options +apply only to the 8-bit, or 16-bit, or 32-bit libraries, respectively, and are +not available for libraries that have not been built. If an unavailable option +is encountered, the "usage" information is output. +. +. +.SH OPTIONS +.rs +.TP 10 +\fB--prefix\fP +Writes the directory prefix used in the PCRE2 installation for architecture +independent files (\fI/usr\fP on many systems, \fI/usr/local\fP on some +systems) to the standard output. +.TP 10 +\fB--exec-prefix\fP +Writes the directory prefix used in the PCRE2 installation for architecture +dependent files (normally the same as \fB--prefix\fP) to the standard output. +.TP 10 +\fB--version\fP +Writes the version number of the installed PCRE2 libraries to the standard +output. +.TP 10 +\fB--libs8\fP +Writes to the standard output the command line options required to link +with the 8-bit PCRE2 library (\fB-lpcre2-8\fP on many systems). +.TP 10 +\fB--libs16\fP +Writes to the standard output the command line options required to link +with the 16-bit PCRE2 library (\fB-lpcre2-16\fP on many systems). +.TP 10 +\fB--libs32\fP +Writes to the standard output the command line options required to link +with the 32-bit PCRE2 library (\fB-lpcre2-32\fP on many systems). +.TP 10 +\fB--libs-posix\fP +Writes to the standard output the command line options required to link with +PCRE2's POSIX API wrapper library (\fB-lpcre2-posix\fP \fB-lpcre2-8\fP on many +systems). +.TP 10 +\fB--cflags\fP +Writes to the standard output the command line options required to compile +files that use PCRE2 (this may include some \fB-I\fP options, but is blank on +many systems). +.TP 10 +\fB--cflags-posix\fP +Writes to the standard output the command line options required to compile +files that use PCRE2's POSIX API wrapper library (this may include some +\fB-I\fP options, but is blank on many systems). +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2(3)\fP +. +. +.SH AUTHOR +.rs +.sp +This manual page was originally written by Mark Baker for the Debian GNU/Linux +system. It has been subsequently revised as a generic PCRE2 man page. +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 28 September 2014 +.fi diff --git a/3rd/pcre2/doc/pcre2-config.txt b/3rd/pcre2/doc/pcre2-config.txt new file mode 100644 index 00000000..c87de589 --- /dev/null +++ b/3rd/pcre2/doc/pcre2-config.txt @@ -0,0 +1,84 @@ +PCRE2-CONFIG(1) General Commands Manual PCRE2-CONFIG(1) + + +NAME + pcre2-config - program to return PCRE2 configuration + + +SYNOPSIS + + pcre2-config [--prefix] [--exec-prefix] [--version] + [--libs8] [--libs16] [--libs32] [--libs-posix] + [--cflags] [--cflags-posix] + + +DESCRIPTION + + pcre2-config returns the configuration of the installed PCRE2 libraries + and the options required to compile a program to use them. Some of the + options apply only to the 8-bit, or 16-bit, or 32-bit libraries, re- + spectively, and are not available for libraries that have not been + built. If an unavailable option is encountered, the "usage" information + is output. + + +OPTIONS + + --prefix Writes the directory prefix used in the PCRE2 installation + for architecture independent files (/usr on many systems, + /usr/local on some systems) to the standard output. + + --exec-prefix + Writes the directory prefix used in the PCRE2 installation + for architecture dependent files (normally the same as --pre- + fix) to the standard output. + + --version Writes the version number of the installed PCRE2 libraries to + the standard output. + + --libs8 Writes to the standard output the command line options re- + quired to link with the 8-bit PCRE2 library (-lpcre2-8 on + many systems). + + --libs16 Writes to the standard output the command line options re- + quired to link with the 16-bit PCRE2 library (-lpcre2-16 on + many systems). + + --libs32 Writes to the standard output the command line options re- + quired to link with the 32-bit PCRE2 library (-lpcre2-32 on + many systems). + + --libs-posix + Writes to the standard output the command line options re- + quired to link with PCRE2's POSIX API wrapper library + (-lpcre2-posix -lpcre2-8 on many systems). + + --cflags Writes to the standard output the command line options re- + quired to compile files that use PCRE2 (this may include some + -I options, but is blank on many systems). + + --cflags-posix + Writes to the standard output the command line options re- + quired to compile files that use PCRE2's POSIX API wrapper + library (this may include some -I options, but is blank on + many systems). + + +SEE ALSO + + pcre2(3) + + +AUTHOR + + This manual page was originally written by Mark Baker for the Debian + GNU/Linux system. It has been subsequently revised as a generic PCRE2 + man page. + + +REVISION + + Last updated: 28 September 2014 + + +PCRE2 10.45 28 September 2014 PCRE2-CONFIG(1) diff --git a/3rd/pcre2/doc/pcre2.3 b/3rd/pcre2/doc/pcre2.3 new file mode 100644 index 00000000..44566c7d --- /dev/null +++ b/3rd/pcre2/doc/pcre2.3 @@ -0,0 +1,211 @@ +.TH PCRE2 3 "18 December 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH INTRODUCTION +.rs +.sp +PCRE2 is the name used for a revised API for the PCRE library, which is a set +of functions, written in C, that implement regular expression pattern matching +using the same syntax and semantics as Perl, with just a few differences. After +nearly two decades, the limitations of the original API were making development +increasingly difficult. The new API is more extensible, and it was simplified +by abolishing the separate "study" optimizing function; in PCRE2, patterns are +automatically optimized where possible. Since forking from PCRE1, the code has +been extensively refactored and new features introduced. The old library is now +obsolete and is no longer maintained. +.P +As well as Perl-style regular expression patterns, some features that appeared +in Python and the original PCRE before they appeared in Perl are available +using the Python syntax. There is also some support for one or two .NET and +Oniguruma syntax items, and there are options for requesting some minor changes +that give better ECMAScript (aka JavaScript) compatibility. +.P +The source code for PCRE2 can be compiled to support strings of 8-bit, 16-bit, +or 32-bit code units, which means that up to three separate libraries may be +installed, one for each code unit size. The size of code unit is not related to +the bit size of the underlying hardware. In a 64-bit environment that also +supports 32-bit applications, versions of PCRE2 that are compiled in both +64-bit and 32-bit modes may be needed. +.P +The original work to extend PCRE to 16-bit and 32-bit code units was done by +Zoltan Herczeg and Christian Persch, respectively. In all three cases, strings +can be interpreted either as one character per code unit, or as UTF-encoded +Unicode, with support for Unicode general category properties. Unicode support +is optional at build time (but is the default). However, processing strings as +UTF code units must be enabled explicitly at run time. The version of Unicode +in use can be discovered by running +.sp + pcre2test -C +.P +The three libraries contain identical sets of functions, with names ending in +_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However, +by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just +one code unit width can be written using generic names such as +\fBpcre2_compile()\fP, and the documentation is written assuming that this is +the case. +.P +In addition to the Perl-compatible matching function, PCRE2 contains an +alternative function that matches the same compiled patterns in a different +way. In certain circumstances, the alternative function has some advantages. +For a discussion of the two matching algorithms, see the +.\" HREF +\fBpcre2matching\fP +.\" +page. +.P +Details of exactly which Perl regular expression features are and are not +supported by PCRE2 are given in separate documents. See the +.\" HREF +\fBpcre2pattern\fP +.\" +and +.\" HREF +\fBpcre2compat\fP +.\" +pages. There is a syntax summary in the +.\" HREF +\fBpcre2syntax\fP +.\" +page. +.P +Some features of PCRE2 can be included, excluded, or changed when the library +is built. The +.\" HREF +\fBpcre2_config()\fP +.\" +function makes it possible for a client to discover which features are +available. The features themselves are described in the +.\" HREF +\fBpcre2build\fP +.\" +page. Documentation about building PCRE2 for various operating systems can be +found in the +.\" HTML +.\" +\fBREADME\fP +.\" +and +.\" HTML +.\" +\fBNON-AUTOTOOLS_BUILD\fP +.\" +files in the source distribution. +.P +The libraries contains a number of undocumented internal functions and data +tables that are used by more than one of the exported external functions, but +which are not intended for use by external callers. Their names all begin with +"_pcre2", which hopefully will not provoke any name clashes. In some +environments, it is possible to control which external symbols are exported +when a shared library is built, and in these cases the undocumented symbols are +not exported. +. +. +.SH "SECURITY CONSIDERATIONS" +.rs +.sp +If you are using PCRE2 in a non-UTF application that permits users to supply +arbitrary patterns for compilation, you should be aware of a feature that +allows users to turn on UTF support from within a pattern. For example, an +8-bit pattern that begins with "(*UTF)" turns on UTF-8 mode, which interprets +patterns and subjects as strings of UTF-8 code units instead of individual +8-bit characters. This causes both the pattern and any data against which it is +matched to be checked for UTF-8 validity. If the data string is very long, such +a check might use sufficiently many resources as to cause your application to +lose performance. +.P +One way of guarding against this possibility is to use the +\fBpcre2_pattern_info()\fP function to check the compiled pattern's options for +PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when calling +\fBpcre2_compile()\fP. This causes a compile time error if the pattern contains +a UTF-setting sequence. +.P +The use of Unicode properties for character types such as \ed can also be +enabled from within the pattern, by specifying "(*UCP)". This feature can be +disallowed by setting the PCRE2_NEVER_UCP option. +.P +If your application is one that supports UTF, be aware that validity checking +can take time. If the same data string is to be matched many times, you can use +the PCRE2_NO_UTF_CHECK option for the second and subsequent matches to avoid +running redundant checks. +.P +The use of the \eC escape sequence in a UTF-8 or UTF-16 pattern can lead to +problems, because it may leave the current matching point in the middle of a +multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C option can be used by an +application to lock out the use of \eC, causing a compile-time error if it is +encountered. It is also possible to build PCRE2 with the use of \eC permanently +disabled. +.P +Another way that performance can be hit is by running a pattern that has a very +large search tree against a string that will never match. Nested unlimited +repeats in a pattern are a common example. PCRE2 provides some protection +against this: see the \fBpcre2_set_match_limit()\fP function in the +.\" HREF +\fBpcre2api\fP +.\" +page. There is a similar function called \fBpcre2_set_depth_limit()\fP that can +be used to restrict the amount of memory that is used. +. +. +.SH "USER DOCUMENTATION" +.rs +.sp +The user documentation for PCRE2 comprises a number of different sections. In +the "man" format, each of these is a separate "man page". In the HTML format, +each is a separate page, linked from the index page. In the plain text format, +the descriptions of the \fBpcre2grep\fP and \fBpcre2test\fP programs are in +files called \fBpcre2grep.txt\fP and \fBpcre2test.txt\fP, respectively. The +remaining sections, except for the \fBpcre2demo\fP section (which is a program +listing), and the short pages for individual functions, are concatenated in +\fBpcre2.txt\fP, for ease of searching. The sections are as follows: +.sp + pcre2 this document + pcre2-config show PCRE2 installation configuration information + pcre2api details of PCRE2's native C API + pcre2build building PCRE2 + pcre2callout details of the pattern callout feature + pcre2compat discussion of Perl compatibility + pcre2convert details of pattern conversion functions + pcre2demo a demonstration C program that uses PCRE2 + pcre2grep description of the \fBpcre2grep\fP command (8-bit only) + pcre2jit discussion of just-in-time optimization support + pcre2limits details of size and other limits + pcre2matching discussion of the two matching algorithms + pcre2partial details of the partial matching facility +.\" JOIN + pcre2pattern syntax and semantics of supported regular + expression patterns + pcre2perform discussion of performance issues + pcre2posix the POSIX-compatible C API for the 8-bit library + pcre2sample discussion of the pcre2demo program + pcre2serialize details of pattern serialization + pcre2syntax quick syntax reference + pcre2test description of the \fBpcre2test\fP command + pcre2unicode discussion of Unicode and UTF support +.sp +In the "man" and HTML formats, there is also a short page for each C library +function, listing its arguments and results. +. +. +.SH AUTHORS +.rs +.sp +The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Herczeg. +.P +PCRE2 was written by Philip Hazel, of the University Computing Service, +Cambridge, England. Many others have also contributed. +.P +To contact the maintainers, please use the GitHub issues tracker or PCRE2 +mailing list, as described at the project page: +.\" HTML +.\" +https://github.com/PCRE2Project/pcre2 +.\" +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 18 December 2024 +Copyright (c) 1997-2021 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2.txt b/3rd/pcre2/doc/pcre2.txt new file mode 100644 index 00000000..38e86d6e --- /dev/null +++ b/3rd/pcre2/doc/pcre2.txt @@ -0,0 +1,12696 @@ +----------------------------------------------------------------------------- +This file contains a concatenation of the PCRE2 man pages, converted to plain +text format for ease of searching with a text editor, or for use on systems +that do not have a man page processor. The small individual files that give +synopses of each function in the library have not been included. Neither has +the pcre2demo program. There are separate text files for the pcre2grep and +pcre2test commands. +----------------------------------------------------------------------------- + + +PCRE2(3) Library Functions Manual PCRE2(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +INTRODUCTION + + PCRE2 is the name used for a revised API for the PCRE library, which is + a set of functions, written in C, that implement regular expression + pattern matching using the same syntax and semantics as Perl, with just + a few differences. After nearly two decades, the limitations of the + original API were making development increasingly difficult. The new + API is more extensible, and it was simplified by abolishing the sepa- + rate "study" optimizing function; in PCRE2, patterns are automatically + optimized where possible. Since forking from PCRE1, the code has been + extensively refactored and new features introduced. The old library is + now obsolete and is no longer maintained. + + As well as Perl-style regular expression patterns, some features that + appeared in Python and the original PCRE before they appeared in Perl + are available using the Python syntax. There is also some support for + one or two .NET and Oniguruma syntax items, and there are options for + requesting some minor changes that give better ECMAScript (aka + JavaScript) compatibility. + + The source code for PCRE2 can be compiled to support strings of 8-bit, + 16-bit, or 32-bit code units, which means that up to three separate li- + braries may be installed, one for each code unit size. The size of code + unit is not related to the bit size of the underlying hardware. In a + 64-bit environment that also supports 32-bit applications, versions of + PCRE2 that are compiled in both 64-bit and 32-bit modes may be needed. + + The original work to extend PCRE to 16-bit and 32-bit code units was + done by Zoltan Herczeg and Christian Persch, respectively. In all three + cases, strings can be interpreted either as one character per code + unit, or as UTF-encoded Unicode, with support for Unicode general cate- + gory properties. Unicode support is optional at build time (but is the + default). However, processing strings as UTF code units must be enabled + explicitly at run time. The version of Unicode in use can be discovered + by running + + pcre2test -C + + The three libraries contain identical sets of functions, with names + ending in _8, _16, or _32, respectively (for example, pcre2_com- + pile_8()). However, by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or + 32, a program that uses just one code unit width can be written using + generic names such as pcre2_compile(), and the documentation is written + assuming that this is the case. + + In addition to the Perl-compatible matching function, PCRE2 contains an + alternative function that matches the same compiled patterns in a dif- + ferent way. In certain circumstances, the alternative function has some + advantages. For a discussion of the two matching algorithms, see the + pcre2matching page. + + Details of exactly which Perl regular expression features are and are + not supported by PCRE2 are given in separate documents. See the + pcre2pattern and pcre2compat pages. There is a syntax summary in the + pcre2syntax page. + + Some features of PCRE2 can be included, excluded, or changed when the + library is built. The pcre2_config() function makes it possible for a + client to discover which features are available. The features them- + selves are described in the pcre2build page. Documentation about build- + ing PCRE2 for various operating systems can be found in the README and + NON-AUTOTOOLS_BUILD files in the source distribution. + + The libraries contains a number of undocumented internal functions and + data tables that are used by more than one of the exported external + functions, but which are not intended for use by external callers. + Their names all begin with "_pcre2", which hopefully will not provoke + any name clashes. In some environments, it is possible to control which + external symbols are exported when a shared library is built, and in + these cases the undocumented symbols are not exported. + + +SECURITY CONSIDERATIONS + + If you are using PCRE2 in a non-UTF application that permits users to + supply arbitrary patterns for compilation, you should be aware of a + feature that allows users to turn on UTF support from within a pattern. + For example, an 8-bit pattern that begins with "(*UTF)" turns on UTF-8 + mode, which interprets patterns and subjects as strings of UTF-8 code + units instead of individual 8-bit characters. This causes both the pat- + tern and any data against which it is matched to be checked for UTF-8 + validity. If the data string is very long, such a check might use suf- + ficiently many resources as to cause your application to lose perfor- + mance. + + One way of guarding against this possibility is to use the pcre2_pat- + tern_info() function to check the compiled pattern's options for + PCRE2_UTF. Alternatively, you can set the PCRE2_NEVER_UTF option when + calling pcre2_compile(). This causes a compile time error if the pat- + tern contains a UTF-setting sequence. + + The use of Unicode properties for character types such as \d can also + be enabled from within the pattern, by specifying "(*UCP)". This fea- + ture can be disallowed by setting the PCRE2_NEVER_UCP option. + + If your application is one that supports UTF, be aware that validity + checking can take time. If the same data string is to be matched many + times, you can use the PCRE2_NO_UTF_CHECK option for the second and + subsequent matches to avoid running redundant checks. + + The use of the \C escape sequence in a UTF-8 or UTF-16 pattern can lead + to problems, because it may leave the current matching point in the + middle of a multi-code-unit character. The PCRE2_NEVER_BACKSLASH_C op- + tion can be used by an application to lock out the use of \C, causing a + compile-time error if it is encountered. It is also possible to build + PCRE2 with the use of \C permanently disabled. + + Another way that performance can be hit is by running a pattern that + has a very large search tree against a string that will never match. + Nested unlimited repeats in a pattern are a common example. PCRE2 pro- + vides some protection against this: see the pcre2_set_match_limit() + function in the pcre2api page. There is a similar function called + pcre2_set_depth_limit() that can be used to restrict the amount of mem- + ory that is used. + + +USER DOCUMENTATION + + The user documentation for PCRE2 comprises a number of different sec- + tions. In the "man" format, each of these is a separate "man page". In + the HTML format, each is a separate page, linked from the index page. + In the plain text format, the descriptions of the pcre2grep and + pcre2test programs are in files called pcre2grep.txt and pcre2test.txt, + respectively. The remaining sections, except for the pcre2demo section + (which is a program listing), and the short pages for individual func- + tions, are concatenated in pcre2.txt, for ease of searching. The sec- + tions are as follows: + + pcre2 this document + pcre2-config show PCRE2 installation configuration information + pcre2api details of PCRE2's native C API + pcre2build building PCRE2 + pcre2callout details of the pattern callout feature + pcre2compat discussion of Perl compatibility + pcre2convert details of pattern conversion functions + pcre2demo a demonstration C program that uses PCRE2 + pcre2grep description of the pcre2grep command (8-bit only) + pcre2jit discussion of just-in-time optimization support + pcre2limits details of size and other limits + pcre2matching discussion of the two matching algorithms + pcre2partial details of the partial matching facility + pcre2pattern syntax and semantics of supported regular + expression patterns + pcre2perform discussion of performance issues + pcre2posix the POSIX-compatible C API for the 8-bit library + pcre2sample discussion of the pcre2demo program + pcre2serialize details of pattern serialization + pcre2syntax quick syntax reference + pcre2test description of the pcre2test command + pcre2unicode discussion of Unicode and UTF support + + In the "man" and HTML formats, there is also a short page for each C + library function, listing its arguments and results. + + +AUTHORS + + The current maintainers of PCRE2 are Nicholas Wilson and Zoltan Her- + czeg. + + PCRE2 was written by Philip Hazel, of the University Computing Service, + Cambridge, England. Many others have also contributed. + + To contact the maintainers, please use the GitHub issues tracker or + PCRE2 mailing list, as described at the project page: + https://github.com/PCRE2Project/pcre2 + + +REVISION + + Last updated: 18 December 2024 + Copyright (c) 1997-2021 University of Cambridge. + + +PCRE2 10.45 18 December 2024 PCRE2(3) +------------------------------------------------------------------------------ + + +PCRE2API(3) Library Functions Manual PCRE2API(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + #include + + PCRE2 is a new API for PCRE, starting at release 10.0. This document + contains a description of all its native functions. See the pcre2 docu- + ment for an overview of all the PCRE2 documentation. + + +PCRE2 NATIVE API BASIC FUNCTIONS + + pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); + + void pcre2_code_free(pcre2_code *code); + + pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); + + pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); + + int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); + + void pcre2_match_data_free(pcre2_match_data *match_data); + + +PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS + + PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); + + uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); + + PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); + + +PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS + + pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); + + pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); + + void pcre2_general_context_free(pcre2_general_context *gcontext); + + +PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS + + pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); + + pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); + + void pcre2_compile_context_free(pcre2_compile_context *ccontext); + + int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); + + int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); + + int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); + + int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); + + int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); + + int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, + uint32_t value); + + int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); + + int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); + + int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); + + int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); + + +PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS + + pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); + + pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); + + void pcre2_match_context_free(pcre2_match_context *mcontext); + + int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); + + int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); + + int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); + + int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); + + int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); + + int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); + + int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); + + +PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS + + int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); + + int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); + + void pcre2_substring_free(PCRE2_UCHAR *buffer); + + int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); + + int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); + + int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); + + int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); + + int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); + + int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); + + void pcre2_substring_list_free(PCRE2_UCHAR **list); + + int pcre2_substring_list_get(pcre2_match_data *match_data, + PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); + + +PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION + + int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacementz, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); + + +PCRE2 NATIVE API JIT FUNCTIONS + + int pcre2_jit_compile(pcre2_code *code, uint32_t options); + + int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); + + pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); + + void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); + + void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); + + +PCRE2 NATIVE API SERIALIZATION FUNCTIONS + + int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); + + int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); + + void pcre2_serialize_free(uint8_t *bytes); + + int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); + + +PCRE2 NATIVE API AUXILIARY FUNCTIONS + + pcre2_code *pcre2_code_copy(const pcre2_code *code); + + pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); + + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); + + const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); + + void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); + + int pcre2_pattern_info(const pcre2_code *code, uint32_t what, + void *where); + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + int pcre2_config(uint32_t what, void *where); + + +PCRE2 NATIVE API OBSOLETE FUNCTIONS + + int pcre2_set_recursion_limit(pcre2_match_context *mcontext, + uint32_t value); + + int pcre2_set_recursion_memory_management( + pcre2_match_context *mcontext, + void *(*private_malloc)(size_t, void *), + void (*private_free)(void *, void *), void *memory_data); + + These functions became obsolete at release 10.30 and are retained only + for backward compatibility. They should not be used in new code. The + first is replaced by pcre2_set_depth_limit(); the second is no longer + needed and has no effect (it always returns zero). + + +PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS + + pcre2_convert_context *pcre2_convert_context_create( + pcre2_general_context *gcontext); + + pcre2_convert_context *pcre2_convert_context_copy( + pcre2_convert_context *cvcontext); + + void pcre2_convert_context_free(pcre2_convert_context *cvcontext); + + int pcre2_set_glob_escape(pcre2_convert_context *cvcontext, + uint32_t escape_char); + + int pcre2_set_glob_separator(pcre2_convert_context *cvcontext, + uint32_t separator_char); + + int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, PCRE2_UCHAR **buffer, + PCRE2_SIZE *blength, pcre2_convert_context *cvcontext); + + void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern); + + These functions provide a way of converting non-PCRE2 patterns into + patterns that can be processed by pcre2_compile(). This facility is ex- + perimental and may be changed in future releases. At present, "globs" + and POSIX basic and extended patterns can be converted. Details are + given in the pcre2convert documentation. + + +PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES + + There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit + code units, respectively. However, there is just one header file, + pcre2.h. This contains the function prototypes and other definitions + for all three libraries. One, two, or all three can be installed simul- + taneously. On Unix-like systems the libraries are called libpcre2-8, + libpcre2-16, and libpcre2-32, and they can also co-exist with the orig- + inal PCRE libraries. Every PCRE2 function comes in three different + forms, one for each library, for example: + + pcre2_compile_8() + pcre2_compile_16() + pcre2_compile_32() + + There are also three different sets of data types: + + PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32 + PCRE2_SPTR8, PCRE2_SPTR16, PCRE2_SPTR32 + + The UCHAR types define unsigned code units of the appropriate widths. + For example, PCRE2_UCHAR16 is usually defined as `uint16_t'. The SPTR + types are pointers to constants of the equivalent UCHAR types, that is, + they are pointers to vectors of unsigned code units. + + Character strings are passed to a PCRE2 library as sequences of un- + signed integers in code units of the appropriate width. The length of a + string may be given as a number of code units, or the string may be + specified as zero-terminated. + + Many applications use only one code unit width. For their convenience, + macros are defined whose names are the generic forms such as pcre2_com- + pile() and PCRE2_SPTR. These macros use the value of the macro + PCRE2_CODE_UNIT_WIDTH to generate the appropriate width-specific func- + tion and macro names. PCRE2_CODE_UNIT_WIDTH is not defined by default. + An application must define it to be 8, 16, or 32 before including + pcre2.h in order to make use of the generic names. + + Applications that use more than one code unit width can be linked with + more than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to + be 0 before including pcre2.h, and then use the real function names. + Any code that is to be included in an environment where the value of + PCRE2_CODE_UNIT_WIDTH is unknown should also use the real function + names. (Unfortunately, it is not possible in C code to save and restore + the value of a macro.) + + If PCRE2_CODE_UNIT_WIDTH is not defined before including pcre2.h, a + compiler error occurs. + + When using multiple libraries in an application, you must take care + when processing any particular pattern to use only functions from a + single library. For example, if you want to run a match using a pat- + tern that was compiled with pcre2_compile_16(), you must do so with + pcre2_match_16(), not pcre2_match_8() or pcre2_match_32(). + + In the function summaries above, and in the rest of this document and + other PCRE2 documents, functions and data types are described using + their generic names, without the _8, _16, or _32 suffix. + + +PCRE2 API OVERVIEW + + PCRE2 has its own native API, which is described in this document. + There are also some wrapper functions for the 8-bit library that corre- + spond to the POSIX regular expression API, but they do not give access + to all the functionality of PCRE2 and they are not thread-safe. They + are described in the pcre2posix documentation. Both these APIs define a + set of C function calls. + + The native API C data types, function prototypes, option values, and + error codes are defined in the header file pcre2.h, which also contains + definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release + numbers for the library. Applications can use these to include support + for different releases of PCRE2. + + In a Windows environment, if you want to statically link an application + program against a non-dll PCRE2 library, you must define PCRE2_STATIC + before including pcre2.h. + + The functions pcre2_compile() and pcre2_match() are used for compiling + and matching regular expressions in a Perl-compatible manner. A sample + program that demonstrates the simplest way of using them is provided in + the file called pcre2demo.c in the PCRE2 source distribution. A listing + of this program is given in the pcre2demo documentation, and the + pcre2sample documentation describes how to compile and run it. + + The compiling and matching functions recognize various options that are + passed as bits in an options argument. There are also some more compli- + cated parameters such as custom memory management functions and re- + source limits that are passed in "contexts" (which are just memory + blocks, described below). Simple applications do not need to make use + of contexts. + + Just-in-time (JIT) compiler support is an optional feature of PCRE2 + that can be built in appropriate hardware environments. It greatly + speeds up the matching performance of many patterns. Programs can re- + quest that it be used if available by calling pcre2_jit_compile() after + a pattern has been successfully compiled by pcre2_compile(). This does + nothing if JIT support is not available. + + More complicated programs might need to make use of the specialist + functions pcre2_jit_stack_create(), pcre2_jit_stack_free(), and + pcre2_jit_stack_assign() in order to control the JIT code's memory us- + age. + + JIT matching is automatically used by pcre2_match() if it is available, + unless the PCRE2_NO_JIT option is set. There is also a direct interface + for JIT matching, which gives improved performance at the expense of + less sanity checking. The JIT-specific functions are discussed in the + pcre2jit documentation. + + A second matching function, pcre2_dfa_match(), which is not Perl-com- + patible, is also provided. This uses a different algorithm for the + matching. The alternative algorithm finds all possible matches (at a + given point in the subject), and scans the subject just once (unless + there are lookaround assertions). However, this algorithm does not re- + turn captured substrings. A description of the two matching algorithms + and their advantages and disadvantages is given in the pcre2matching + documentation. There is no JIT support for pcre2_dfa_match(). + + In addition to the main compiling and matching functions, there are + convenience functions for extracting captured substrings from a subject + string that has been matched by pcre2_match(). They are: + + pcre2_substring_copy_byname() + pcre2_substring_copy_bynumber() + pcre2_substring_get_byname() + pcre2_substring_get_bynumber() + pcre2_substring_list_get() + pcre2_substring_length_byname() + pcre2_substring_length_bynumber() + pcre2_substring_nametable_scan() + pcre2_substring_number_from_name() + + pcre2_substring_free() and pcre2_substring_list_free() are also pro- + vided, to free memory used for extracted strings. If either of these + functions is called with a NULL argument, the function returns immedi- + ately without doing anything. + + The function pcre2_substitute() can be called to match a pattern and + return a copy of the subject string with substitutions for parts that + were matched. + + Functions whose names begin with pcre2_serialize_ are used for saving + compiled patterns on disc or elsewhere, and reloading them later. + + Finally, there are functions for finding out information about a com- + piled pattern (pcre2_pattern_info()) and about the configuration with + which PCRE2 was built (pcre2_config()). + + Functions with names ending with _free() are used for freeing memory + blocks of various sorts. In all cases, if one of these functions is + called with a NULL argument, it does nothing. + + +STRING LENGTHS AND OFFSETS + + The PCRE2 API uses string lengths and offsets into strings of code + units in several places. These values are always of type PCRE2_SIZE, + which is an unsigned integer type, currently always defined as size_t. + The largest value that can be stored in such a type (that is + ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated + strings and unset offsets. Therefore, the longest string that can be + handled is one less than this maximum. Note that string lengths are al- + ways given in code units. Only in the 8-bit library is such a length + the same as the number of bytes in the string. + + +NEWLINES + + PCRE2 supports five different conventions for indicating line breaks in + strings: a single CR (carriage return) character, a single LF (line- + feed) character, the two-character sequence CRLF, any of the three pre- + ceding, or any Unicode newline sequence. The Unicode newline sequences + are the three just mentioned, plus the single characters VT (vertical + tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line + separator, U+2028), and PS (paragraph separator, U+2029). + + Each of the first three conventions is used by at least one operating + system as its standard newline sequence. When PCRE2 is built, a default + can be specified. If it is not, the default is set to LF, which is the + Unix standard. However, the newline convention can be changed by an ap- + plication when calling pcre2_compile(), or it can be specified by spe- + cial text at the start of the pattern itself; this overrides any other + settings. See the pcre2pattern page for details of the special charac- + ter sequences. + + In the PCRE2 documentation the word "newline" is used to mean "the + character or pair of characters that indicate a line break". The choice + of newline convention affects the handling of the dot, circumflex, and + dollar metacharacters, the handling of #-comments in /x mode, and, when + CRLF is a recognized line ending sequence, the match position advance- + ment for a non-anchored pattern. There is more detail about this in the + section on pcre2_match() options below. + + The choice of newline convention does not affect the interpretation of + the \n or \r escape sequences, nor does it affect what \R matches; this + has its own separate convention. + + +MULTITHREADING + + In a multithreaded application it is important to keep thread-specific + data separate from data that can be shared between threads. The PCRE2 + library code itself is thread-safe: it contains no static or global + variables. The API is designed to be fairly simple for non-threaded ap- + plications while at the same time ensuring that multithreaded applica- + tions can use it. + + There are several different blocks of data that are used to pass infor- + mation between the application and the PCRE2 libraries. + + The compiled pattern + + A pointer to the compiled form of a pattern is returned to the user + when pcre2_compile() is successful. The data in the compiled pattern is + fixed, and does not change when the pattern is matched. Therefore, it + is thread-safe, that is, the same compiled pattern can be used by more + than one thread simultaneously. For example, an application can compile + all its patterns at the start, before forking off multiple threads that + use them. However, if the just-in-time (JIT) optimization feature is + being used, it needs separate memory stack areas for each thread. See + the pcre2jit documentation for more details. + + In a more complicated situation, where patterns are compiled only when + they are first needed, but are still shared between threads, pointers + to compiled patterns must be protected from simultaneous writing by + multiple threads. This is somewhat tricky to do correctly. If you know + that writing to a pointer is atomic in your environment, you can use + logic like this: + + Get a read-only (shared) lock (mutex) for pointer + if (pointer == NULL) + { + Get a write (unique) lock for pointer + if (pointer == NULL) pointer = pcre2_compile(... + } + Release the lock + Use pointer in pcre2_match() + + Of course, testing for compilation errors should also be included in + the code. + + The reason for checking the pointer a second time is as follows: Sev- + eral threads may have acquired the shared lock and tested the pointer + for being NULL, but only one of them will be given the write lock, with + the rest kept waiting. The winning thread will compile the pattern and + store the result. After this thread releases the write lock, another + thread will get it, and if it does not retest pointer for being NULL, + will recompile the pattern and overwrite the pointer, creating a memory + leak and possibly causing other issues. + + In an environment where writing to a pointer may not be atomic, the + above logic is not sufficient. The thread that is doing the compiling + may be descheduled after writing only part of the pointer, which could + cause other threads to use an invalid value. Instead of checking the + pointer itself, a separate "pointer is valid" flag (that can be updated + atomically) must be used: + + Get a read-only (shared) lock (mutex) for pointer + if (!pointer_is_valid) + { + Get a write (unique) lock for pointer + if (!pointer_is_valid) + { + pointer = pcre2_compile(... + pointer_is_valid = TRUE + } + } + Release the lock + Use pointer in pcre2_match() + + If JIT is being used, but the JIT compilation is not being done immedi- + ately (perhaps waiting to see if the pattern is used often enough), + similar logic is required. JIT compilation updates a value within the + compiled code block, so a thread must gain unique write access to the + pointer before calling pcre2_jit_compile(). Alternatively, + pcre2_code_copy() or pcre2_code_copy_with_tables() can be used to ob- + tain a private copy of the compiled code before calling the JIT com- + piler. + + Context blocks + + The next main section below introduces the idea of "contexts" in which + PCRE2 functions are called. A context is nothing more than a collection + of parameters that control the way PCRE2 operates. Grouping a number of + parameters together in a context is a convenient way of passing them to + a PCRE2 function without using lots of arguments. The parameters that + are stored in contexts are in some sense "advanced features" of the + API. Many straightforward applications will not need to use contexts. + + In a multithreaded application, if the parameters in a context are val- + ues that are never changed, the same context can be used by all the + threads. However, if any thread needs to change any value in a context, + it must make its own thread-specific copy. + + Match blocks + + The matching functions need a block of memory for storing the results + of a match. This includes details of what was matched, as well as addi- + tional information such as the name of a (*MARK) setting. Each thread + must provide its own copy of this memory. + + +PCRE2 CONTEXTS + + Some PCRE2 functions have a lot of parameters, many of which are used + only by specialist applications, for example, those that use custom + memory management or non-standard character tables. To keep function + argument lists at a reasonable size, and at the same time to keep the + API extensible, "uncommon" parameters are passed to certain functions + in a context instead of directly. A context is just a block of memory + that holds the parameter values. Applications that do not need to ad- + just any of the context parameters can pass NULL when a context pointer + is required. + + There are three different types of context: a general context that is + relevant for several PCRE2 operations, a compile-time context, and a + match-time context. + + The general context + + At present, this context just contains pointers to (and data for) ex- + ternal memory management functions that are called from several places + in the PCRE2 library. The context is named `general' rather than + specifically `memory' because in future other fields may be added. If + you do not want to supply your own custom memory management functions, + you do not need to bother with a general context. A general context is + created by: + + pcre2_general_context *pcre2_general_context_create( + void *(*private_malloc)(PCRE2_SIZE, void *), + void (*private_free)(void *, void *), void *memory_data); + + The two function pointers specify custom memory management functions, + whose prototypes are: + + void *private_malloc(PCRE2_SIZE, void *); + void private_free(void *, void *); + + Whenever code in PCRE2 calls these functions, the final argument is the + value of memory_data. Either of the first two arguments of the creation + function may be NULL, in which case the system memory management func- + tions malloc() and free() are used. (This is not currently useful, as + there are no other fields in a general context, but in future there + might be.) The private_malloc() function is used (if supplied) to ob- + tain memory for storing the context, and all three values are saved as + part of the context. + + Whenever PCRE2 creates a data block of any kind, the block contains a + pointer to the free() function that matches the malloc() function that + was used. When the time comes to free the block, this function is + called. + + A general context can be copied by calling: + + pcre2_general_context *pcre2_general_context_copy( + pcre2_general_context *gcontext); + + The memory used for a general context should be freed by calling: + + void pcre2_general_context_free(pcre2_general_context *gcontext); + + If this function is passed a NULL argument, it returns immediately + without doing anything. + + The compile context + + A compile context is required if you want to provide an external func- + tion for stack checking during compilation or to change the default + values of any of the following compile-time parameters: + + What \R matches (Unicode newlines or CR, LF, CRLF only) + PCRE2's character tables + The newline character sequence + The compile time nested parentheses limit + The maximum length of the pattern string + The extra options bits (none set by default) + Which performance optimizations the compiler should apply + + A compile context is also required if you are using custom memory man- + agement. If none of these apply, just pass NULL as the context argu- + ment of pcre2_compile(). + + A compile context is created, copied, and freed by the following func- + tions: + + pcre2_compile_context *pcre2_compile_context_create( + pcre2_general_context *gcontext); + + pcre2_compile_context *pcre2_compile_context_copy( + pcre2_compile_context *ccontext); + + void pcre2_compile_context_free(pcre2_compile_context *ccontext); + + A compile context is created with default values for its parameters. + These can be changed by calling the following functions, which return 0 + on success, or PCRE2_ERROR_BADDATA if invalid data is detected. + + int pcre2_set_bsr(pcre2_compile_context *ccontext, + uint32_t value); + + The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only + CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any + Unicode line ending sequence. The value is used by the JIT compiler and + by the two interpreted matching functions, pcre2_match() and + pcre2_dfa_match(). + + int pcre2_set_character_tables(pcre2_compile_context *ccontext, + const uint8_t *tables); + + The value must be the result of a call to pcre2_maketables(), whose + only argument is a general context. This function builds a set of char- + acter tables in the current locale. + + int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, + uint32_t extra_options); + + As PCRE2 has developed, almost all the 32 option bits that are avail- + able in the options argument of pcre2_compile() have been used up. To + avoid running out, the compile context contains a set of extra option + bits which are used for some newer, assumed rarer, options. This func- + tion sets those bits. It always sets all the bits (either on or off). + It does not modify any existing setting. The available options are de- + fined in the section entitled "Extra compile options" below. + + int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, + PCRE2_SIZE value); + + This sets a maximum length, in code units, for any pattern string that + is compiled with this context. If the pattern is longer, an error is + generated. This facility is provided so that applications that accept + patterns from external sources can limit their size. The default is the + largest number that a PCRE2_SIZE variable can hold, which is effec- + tively unlimited. + + int pcre2_set_max_pattern_compiled_length( + pcre2_compile_context *ccontext, PCRE2_SIZE value); + + This sets a maximum size, in bytes, for the memory needed to hold the + compiled version of a pattern that is compiled with this context. If + the pattern needs more memory, an error is generated. This facility is + provided so that applications that accept patterns from external + sources can limit the amount of memory they use. The default is the + largest number that a PCRE2_SIZE variable can hold, which is effec- + tively unlimited. + + int pcre2_set_max_varlookbehind(pcre2_compile_contest *ccontext, + uint32_t value); + + This sets a maximum length for the number of characters matched by a + variable-length lookbehind assertion. The default is set when PCRE2 is + built, with the ultimate default being 255, the same as Perl. Lookbe- + hind assertions without a bounding length are not supported. + + int pcre2_set_newline(pcre2_compile_context *ccontext, + uint32_t value); + + This specifies which characters or character sequences are to be recog- + nized as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage + return only), PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the + two-character sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any + of the above), PCRE2_NEWLINE_ANY (any Unicode newline sequence), or + PCRE2_NEWLINE_NUL (the NUL character, that is a binary zero). + + A pattern can override the value set in the compile context by starting + with a sequence such as (*CRLF). See the pcre2pattern page for details. + + When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EX- + TENDED_MORE option, the newline convention affects the recognition of + the end of internal comments starting with #. The value is saved with + the compiled pattern for subsequent use by the JIT compiler and by the + two interpreted matching functions, pcre2_match() and + pcre2_dfa_match(). + + int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, + uint32_t value); + + This parameter adjusts the limit, set when PCRE2 is built (default + 250), on the depth of parenthesis nesting in a pattern. This limit + stops rogue patterns using up too much system stack when being com- + piled. The limit applies to parentheses of all kinds, not just captur- + ing parentheses. + + int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, + int (*guard_function)(uint32_t, void *), void *user_data); + + There is at least one application that runs PCRE2 in threads with very + limited system stack, where running out of stack is to be avoided at + all costs. The parenthesis limit above cannot take account of how much + stack is actually available during compilation. For a finer control, + you can supply a function that is called whenever pcre2_compile() + starts to compile a parenthesized part of a pattern. This function can + check the actual stack size (or anything else that it wants to, of + course). + + The first argument to the callout function gives the current depth of + nesting, and the second is user data that is set up by the last argu- + ment of pcre2_set_compile_recursion_guard(). The callout function + should return zero if all is well, or non-zero to force an error. + + int pcre2_set_optimize(pcre2_compile_context *ccontext, + uint32_t directive); + + PCRE2 can apply various performance optimizations during compilation, + in order to make matching faster. For example, the compiler might con- + vert some regex constructs into an equivalent construct which + pcre2_match() can execute faster. By default, all available optimiza- + tions are enabled. However, in rare cases, one might wish to disable + specific optimizations. For example, if it is known that some optimiza- + tions cannot benefit a certain regex, it might be desirable to disable + them, in order to speed up compilation. + + The permitted values of directive are as follows: + + PCRE2_OPTIMIZATION_FULL + + Enable all optional performance optimizations. This is the default + value. + + PCRE2_OPTIMIZATION_NONE + + Disable all optional performance optimizations. + + PCRE2_AUTO_POSSESS + PCRE2_AUTO_POSSESS_OFF + + Enable/disable "auto-possessification" of variable quantifiers such as + * and +. This optimization, for example, turns a+b into a++b in order + to avoid backtracks into a+ that can never be successful. However, if + callouts are in use, auto-possessification means that some callouts are + never taken. You can disable this optimization if you want the matching + functions to do a full, unoptimized search and run all the callouts. + + PCRE2_DOTSTAR_ANCHOR + PCRE2_DOTSTAR_ANCHOR_OFF + + Enable/disable an optimization that is applied when .* is the first + significant item in a top-level branch of a pattern, and all the other + branches also start with .* or with \A or \G or ^. Such a pattern is + automatically anchored if PCRE2_DOTALL is set for all the .* items and + PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that + any match must start either at the start of the subject or following a + newline is remembered. Like other optimizations, this can cause call- + outs to be skipped. + + Dotstar anchor optimization is automatically disabled for .* if it is + inside an atomic group or a capture group that is the subject of a + backreference, or if the pattern contains (*PRUNE) or (*SKIP). + + PCRE2_START_OPTIMIZE + PCRE2_START_OPTIMIZE_OFF + + Enable/disable optimizations which cause matching functions to scan the + subject string for specific code unit values before attempting a match. + For example, if it is known that an unanchored match must start with a + specific value, the matching code searches the subject for that value, + and fails immediately if it cannot find it, without actually running + the main matching function. This means that a special item such as + (*COMMIT) at the start of a pattern is not considered until after a + suitable starting point for the match has been found. Also, when call- + outs or (*MARK) items are in use, these "start-up" optimizations can + cause them to be skipped if the pattern is never actually used. The + start-up optimizations are in effect a pre-scan of the subject that + takes place before the pattern is run. + + Disabling start-up optimizations ensures that in cases where the result + is "no match", the callouts do occur, and that items such as (*COMMIT) + and (*MARK) are considered at every possible starting position in the + subject string. + + Disabling start-up optimizations may change the outcome of a matching + operation. Consider the pattern + + (*COMMIT)ABC + + When this is compiled, PCRE2 records the fact that a match must start + with the character "A". Suppose the subject string is "DEFABC". The + start-up optimization scans along the subject, finds "A" and runs the + first match attempt from there. The (*COMMIT) item means that the pat- + tern must match the current starting position, which in this case, it + does. However, if the same match is run without start-up optimizations, + the initial scan along the subject string does not happen. The first + match attempt is run starting from "D" and when this fails, (*COMMIT) + prevents any further matches being tried, so the overall result is "no + match". + + Another start-up optimization makes use of a minimum length for a + matching subject, which is recorded when possible. Consider the pattern + + (*MARK:1)B(*MARK:2)(X|Y) + + The minimum length for a match is two characters. If the subject is + "XXBB", the "starting character" optimization skips "XX", then tries to + match "BB", which is long enough. In the process, (*MARK:2) is encoun- + tered and remembered. When the match attempt fails, the next "B" is + found, but there is only one character left, so there are no more at- + tempts, and "no match" is returned with the "last mark seen" set to + "2". Without start-up optimizations, however, matches are tried at + every possible starting position, including at the end of the subject, + where (*MARK:1) is encountered, but there is no "B", so the "last mark + seen" that is returned is "1". In this case, the optimizations do not + affect the overall match result, which is still "no match", but they do + affect the auxiliary information that is returned. + + The match context + + A match context is required if you want to: + + Set up a callout function + Set an offset limit for matching an unanchored pattern + Change the limit on the amount of heap used when matching + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management specifically for the match + + If none of these apply, just pass NULL as the context argument of + pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match(). + + A match context is created, copied, and freed by the following func- + tions: + + pcre2_match_context *pcre2_match_context_create( + pcre2_general_context *gcontext); + + pcre2_match_context *pcre2_match_context_copy( + pcre2_match_context *mcontext); + + void pcre2_match_context_free(pcre2_match_context *mcontext); + + A match context is created with default values for its parameters. + These can be changed by calling the following functions, which return 0 + on success, or PCRE2_ERROR_BADDATA if invalid data is detected. + + int pcre2_set_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_callout_block *, void *), + void *callout_data); + + This sets up a callout function for PCRE2 to call at specified points + during a matching operation. Details are given in the pcre2callout doc- + umentation. + + int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); + + This sets up a callout function for PCRE2 to call after each substitu- + tion made by pcre2_substitute(). Details are given in the section enti- + tled "Creating a new string with substitutions" below. + + int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); + + This sets up a callout function for PCRE2 to call when performing case + transformations inside pcre2_substitute(). Details are given in the + section entitled "Creating a new string with substitutions" below. + + int pcre2_set_offset_limit(pcre2_match_context *mcontext, + PCRE2_SIZE value); + + The offset_limit parameter limits how far an unanchored search can ad- + vance in the subject string. The default value is PCRE2_UNSET. The + pcre2_match() and pcre2_dfa_match() functions return PCRE2_ERROR_NO- + MATCH if a match with a starting point before or at the given offset is + not found. The pcre2_substitute() function makes no more substitutions. + + For example, if the pattern /abc/ is matched against "123abc" with an + offset limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match + can never be found if the startoffset argument of pcre2_match(), + pcre2_dfa_match(), or pcre2_substitute() is greater than the offset + limit set in the match context. + + When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT op- + tion when calling pcre2_compile() so that when JIT is in use, different + code can be compiled. If a match is started with a non-default match + limit when PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. + + The offset limit facility can be used to track progress when searching + large subject strings or to limit the extent of global substitutions. + See also the PCRE2_FIRSTLINE option, which requires a match to start + before or at the first newline that follows the start of matching in + the subject. If this is set with an offset limit, a match must occur in + the first line and also within the offset limit. In other words, + whichever limit comes first is used. + + int pcre2_set_heap_limit(pcre2_match_context *mcontext, + uint32_t value); + + The heap_limit parameter specifies, in units of kibibytes (1024 bytes), + the maximum amount of heap memory that pcre2_match() may use to hold + backtracking information when running an interpretive match. This limit + also applies to pcre2_dfa_match(), which may use the heap when process- + ing patterns with a lot of nested pattern recursion or lookarounds or + atomic groups. This limit does not apply to matching with the JIT opti- + mization, which has its own memory control arrangements (see the + pcre2jit documentation for more details). If the limit is reached, the + negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default + limit can be set when PCRE2 is built; if it is not, the default is set + very large and is essentially unlimited. + + A value for the heap limit may also be supplied by an item at the start + of a pattern of the form + + (*LIMIT_HEAP=ddd) + + where ddd is a decimal number. However, such a setting is ignored un- + less ddd is less than the limit set by the caller of pcre2_match() or, + if no such limit is set, less than the default. + + The pcre2_match() function always needs some heap memory, so setting a + value of zero guarantees a "heap limit exceeded" error. Details of how + pcre2_match() uses the heap are given in the pcre2perform documenta- + tion. + + For pcre2_dfa_match(), a vector on the system stack is used when pro- + cessing pattern recursions, lookarounds, or atomic groups, and only if + this is not big enough is heap memory used. In this case, setting a + value of zero disables the use of the heap. + + int pcre2_set_match_limit(pcre2_match_context *mcontext, + uint32_t value); + + The match_limit parameter provides a means of preventing PCRE2 from us- + ing up too many computing resources when processing patterns that are + not going to match, but which have a very large number of possibilities + in their search trees. The classic example is a pattern that uses + nested unlimited repeats. + + There is an internal counter in pcre2_match() that is incremented each + time round its main matching loop. If this value reaches the match + limit, pcre2_match() returns the negative value PCRE2_ERROR_MATCHLIMIT. + This has the effect of limiting the amount of backtracking that can + take place. For patterns that are not anchored, the count restarts from + zero for each position in the subject string. This limit also applies + to pcre2_dfa_match(), though the counting is done in a different way. + + When pcre2_match() is called with a pattern that was successfully + processed by pcre2_jit_compile(), the way in which matching is executed + is entirely different. However, there is still the possibility of run- + away matching that goes on for a very long time, and so the match_limit + value is also used in this case (but in a different way) to limit how + long the matching can continue. + + The default value for the limit can be set when PCRE2 is built; the de- + fault is 10 million, which handles all but the most extreme cases. A + value for the match limit may also be supplied by an item at the start + of a pattern of the form + + (*LIMIT_MATCH=ddd) + + where ddd is a decimal number. However, such a setting is ignored un- + less ddd is less than the limit set by the caller of pcre2_match() or + pcre2_dfa_match() or, if no such limit is set, less than the default. + + int pcre2_set_depth_limit(pcre2_match_context *mcontext, + uint32_t value); + + This parameter limits the depth of nested backtracking in + pcre2_match(). Each time a nested backtracking point is passed, a new + memory frame is used to remember the state of matching at that point. + Thus, this parameter indirectly limits the amount of memory that is + used in a match. However, because the size of each memory frame depends + on the number of capturing parentheses, the actual memory limit varies + from pattern to pattern. This limit was more useful in versions before + 10.30, where function recursion was used for backtracking. + + The depth limit is not relevant, and is ignored, when matching is done + using JIT compiled code. However, it is supported by pcre2_dfa_match(), + which uses it to limit the depth of nested internal recursive function + calls that implement atomic groups, lookaround assertions, and pattern + recursions. This limits, indirectly, the amount of system stack that is + used. It was more useful in versions before 10.32, when stack memory + was used for local workspace vectors for recursive function calls. From + version 10.32, only local variables are allocated on the stack and as + each call uses only a few hundred bytes, even a small stack can support + quite a lot of recursion. + + If the depth of internal recursive function calls is great enough, lo- + cal workspace vectors are allocated on the heap from version 10.32 on- + wards, so the depth limit also indirectly limits the amount of heap + memory that is used. A recursive pattern such as /(.(?2))((?1)|)/, when + matched to a very long string using pcre2_dfa_match(), can use a great + deal of memory. However, it is probably better to limit heap usage di- + rectly by calling pcre2_set_heap_limit(). + + The default value for the depth limit can be set when PCRE2 is built; + if it is not, the default is set to the same value as the default for + the match limit. If the limit is exceeded, pcre2_match() or + pcre2_dfa_match() returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth + limit may also be supplied by an item at the start of a pattern of the + form + + (*LIMIT_DEPTH=ddd) + + where ddd is a decimal number. However, such a setting is ignored un- + less ddd is less than the limit set by the caller of pcre2_match() or + pcre2_dfa_match() or, if no such limit is set, less than the default. + + +CHECKING BUILD-TIME OPTIONS + + int pcre2_config(uint32_t what, void *where); + + The function pcre2_config() makes it possible for a PCRE2 client to + find the value of certain configuration parameters and to discover + which optional features have been compiled into the PCRE2 library. The + pcre2build documentation has more details about these features. + + The first argument for pcre2_config() specifies which information is + required. The second argument is a pointer to memory into which the in- + formation is placed. If NULL is passed, the function returns the amount + of memory that is needed for the requested information. For calls that + return numerical values, the value is in bytes; when requesting these + values, where should point to appropriately aligned memory. For calls + that return strings, the required length is given in code units, not + counting the terminating zero. + + When requesting information, the returned value from pcre2_config() is + non-negative on success, or the negative error code PCRE2_ERROR_BADOP- + TION if the value in the first argument is not recognized. The follow- + ing information is available: + + PCRE2_CONFIG_BSR + + The output is a uint32_t integer whose value indicates what character + sequences the \R escape sequence matches by default. A value of + PCRE2_BSR_UNICODE means that \R matches any Unicode line ending se- + quence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, + or CRLF. The default can be overridden when a pattern is compiled. + + PCRE2_CONFIG_COMPILED_WIDTHS + + The output is a uint32_t integer whose lower bits indicate which code + unit widths were selected when PCRE2 was built. The 1-bit indicates + 8-bit support, and the 2-bit and 4-bit indicate 16-bit and 32-bit sup- + port, respectively. + + PCRE2_CONFIG_DEPTHLIMIT + + The output is a uint32_t integer that gives the default limit for the + depth of nested backtracking in pcre2_match() or the depth of nested + recursions, lookarounds, and atomic groups in pcre2_dfa_match(). Fur- + ther details are given with pcre2_set_depth_limit() above. + + PCRE2_CONFIG_HEAPLIMIT + + The output is a uint32_t integer that gives, in kibibytes, the default + limit for the amount of heap memory used by pcre2_match() or + pcre2_dfa_match(). Further details are given with + pcre2_set_heap_limit() above. + + PCRE2_CONFIG_JIT + + The output is a uint32_t integer that is set to one if support for + just-in-time compiling is included in the library; otherwise it is set + to zero. Note that having the support in the library does not guarantee + that JIT will be used for any given match, and neither does it guaran- + tee that JIT will actually be able to function, because it may not be + able to allocate executable memory in some environments. There is a + special call to pcre2_jit_compile() that can be used to check this. See + the pcre2jit documentation for more details. + + PCRE2_CONFIG_JITTARGET + + The where argument should point to a buffer that is at least 48 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) The buffer is filled with a + string that contains the name of the architecture for which the JIT + compiler is configured, for example "x86 32bit (little endian + un- + aligned)". If JIT support is not available, PCRE2_ERROR_BADOPTION is + returned, otherwise the number of code units used is returned. This is + the length of the string, plus one unit for the terminating zero. + + PCRE2_CONFIG_LINKSIZE + + The output is a uint32_t integer that contains the number of bytes used + for internal linkage in compiled regular expressions. When PCRE2 is + configured, the value can be set to 2, 3, or 4, with the default being + 2. This is the value that is returned by pcre2_config(). However, when + the 16-bit library is compiled, a value of 3 is rounded up to 4, and + when the 32-bit library is compiled, internal linkages always use 4 + bytes, so the configured value is not relevant. + + The default value of 2 for the 8-bit and 16-bit libraries is sufficient + for all but the most massive patterns, since it allows the size of the + compiled pattern to be up to 65535 code units. Larger values allow + larger regular expressions to be compiled by those two libraries, but + at the expense of slower matching. + + PCRE2_CONFIG_MATCHLIMIT + + The output is a uint32_t integer that gives the default match limit for + pcre2_match(). Further details are given with pcre2_set_match_limit() + above. + + PCRE2_CONFIG_NEWLINE + + The output is a uint32_t integer whose value specifies the default + character sequence that is recognized as meaning "newline". The values + are: + + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) + + The default should normally correspond to the standard sequence for + your operating system. + + PCRE2_CONFIG_NEVER_BACKSLASH_C + + The output is a uint32_t integer that is set to one if the use of \C + was permanently disabled when PCRE2 was built; otherwise it is set to + zero. + + PCRE2_CONFIG_PARENSLIMIT + + The output is a uint32_t integer that gives the maximum depth of nest- + ing of parentheses (of any kind) in a pattern. This limit is imposed to + cap the amount of system stack used when a pattern is compiled. It is + specified when PCRE2 is built; the default is 250. This limit does not + take into account the stack that may already be used by the calling ap- + plication. For finer control over compilation stack usage, see + pcre2_set_compile_recursion_guard(). + + PCRE2_CONFIG_STACKRECURSE + + This parameter is obsolete and should not be used in new code. The out- + put is a uint32_t integer that is always set to zero. + + PCRE2_CONFIG_TABLES_LENGTH + + The output is a uint32_t integer that gives the length of PCRE2's char- + acter processing tables in bytes. For details of these tables see the + section on locale support below. + + PCRE2_CONFIG_UNICODE_VERSION + + The where argument should point to a buffer that is at least 24 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) If PCRE2 has been compiled + without Unicode support, the buffer is filled with the text "Unicode + not supported". Otherwise, the Unicode version string (for example, + "8.0.0") is inserted. The number of code units used is returned. This + is the length of the string plus one unit for the terminating zero. + + PCRE2_CONFIG_UNICODE + + The output is a uint32_t integer that is set to one if Unicode support + is available; otherwise it is set to zero. Unicode support implies UTF + support. + + PCRE2_CONFIG_VERSION + + The where argument should point to a buffer that is at least 24 code + units long. (The exact length required can be found by calling + pcre2_config() with where set to NULL.) The buffer is filled with the + PCRE2 version string, zero-terminated. The number of code units used is + returned. This is the length of the string plus one unit for the termi- + nating zero. + + +COMPILING A PATTERN + + pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, + uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, + pcre2_compile_context *ccontext); + + void pcre2_code_free(pcre2_code *code); + + pcre2_code *pcre2_code_copy(const pcre2_code *code); + + pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code); + + The pcre2_compile() function compiles a pattern into an internal form. + The pattern is defined by a pointer to a string of code units and a + length in code units. If the pattern is zero-terminated, the length can + be specified as PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a + length of zero is treated as an empty string (NULL with a non-zero + length causes an error return). The function returns a pointer to a + block of memory that contains the compiled pattern and related data, or + NULL if an error occurred. + + If the compile context argument ccontext is NULL, memory for the com- + piled pattern is obtained by calling malloc(). Otherwise, it is ob- + tained from the same memory function that was used for the compile con- + text. The caller must free the memory by calling pcre2_code_free() when + it is no longer needed. If pcre2_code_free() is called with a NULL ar- + gument, it returns immediately, without doing anything. + + The function pcre2_code_copy() makes a copy of the compiled code in new + memory, using the same memory allocator as was used for the original. + However, if the code has been processed by the JIT compiler (see be- + low), the JIT information cannot be copied (because it is position-de- + pendent). The new copy can initially be used only for non-JIT match- + ing, though it can be passed to pcre2_jit_compile() if required. If + pcre2_code_copy() is called with a NULL argument, it returns NULL. + + The pcre2_code_copy() function provides a way for individual threads in + a multithreaded application to acquire a private copy of shared com- + piled code. However, it does not make a copy of the character tables + used by the compiled pattern; the new pattern code points to the same + tables as the original code. (See "Locale Support" below for details + of these character tables.) In many applications the same tables are + used throughout, so this behaviour is appropriate. Nevertheless, there + are occasions when a copy of a compiled pattern and the relevant tables + are needed. The pcre2_code_copy_with_tables() provides this facility. + Copies of both the code and the tables are made, with the new code + pointing to the new tables. The memory for the new tables is automati- + cally freed when pcre2_code_free() is called for the new copy of the + compiled code. If pcre2_code_copy_with_tables() is called with a NULL + argument, it returns NULL. + + NOTE: When one of the matching functions is called, pointers to the + compiled pattern and the subject string are set in the match data block + so that they can be referenced by the substring extraction functions + after a successful match. After running a match, you must not free a + compiled pattern or a subject string until after all operations on the + match data block have taken place, unless, in the case of the subject + string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is + described in the section entitled "Option bits for pcre2_match()" be- + low. + + The options argument for pcre2_compile() contains various bit settings + that affect the compilation. It should be zero if none of them are re- + quired. The available options are described below. Some of them (in + particular, those that are compatible with Perl, but some others as + well) can also be set and unset from within the pattern (see the de- + tailed description in the pcre2pattern documentation). + + For those options that can be different in different parts of the pat- + tern, the contents of the options argument specifies their settings at + the start of compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and + PCRE2_NO_UTF_CHECK options can be set at the time of matching as well + as at compile time. + + Some additional options and less frequently required compile-time para- + meters (for example, the newline setting) can be provided in a compile + context (as described above). + + If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme- + diately. Otherwise, the variables to which these point are set to an + error code and an offset (number of code units) within the pattern, re- + spectively, when pcre2_compile() returns NULL because a compilation er- + ror has occurred. + + There are over 100 positive error codes that pcre2_compile() may return + if it finds an error in the pattern. There are also some negative error + codes that are used for invalid UTF strings when validity checking is + in force. These are the same as given by pcre2_match() and + pcre2_dfa_match(), and are described in the pcre2unicode documentation. + There is no separate documentation for the positive error codes, be- + cause the textual error messages that are obtained by calling the + pcre2_get_error_message() function (see "Obtaining a textual error mes- + sage" below) should be self-explanatory. Macro names starting with + PCRE2_ERROR_ are defined for both positive and negative error codes in + pcre2.h. When compilation is successful errorcode is set to a value + that returns the message "no error" if passed to pcre2_get_error_mes- + sage(). + + The value returned in erroroffset is an indication of where in the pat- + tern an error occurred. When there is no error, zero is returned. A + non-zero value is not necessarily the furthest point in the pattern + that was read. For example, after the error "lookbehind assertion is + not fixed length", the error offset points to the start of the failing + assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of + the first code unit of the failing character. + + Some errors are not detected until the whole pattern has been scanned; + in these cases, the offset passed back is the length of the pattern. + Note that the offset is in code units, not characters, even in a UTF + mode. It may sometimes point into the middle of a UTF-8 or UTF-16 char- + acter. + + This code fragment shows a typical straightforward call to pcre2_com- + pile(): + + pcre2_code *re; + PCRE2_SIZE erroffset; + int errorcode; + re = pcre2_compile( + "^A.*Z", /* the pattern */ + PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ + 0, /* default options */ + &errorcode, /* for error code */ + &erroffset, /* for error offset */ + NULL); /* no compile context */ + + + Main compile options + + The following names for option bits are defined in the pcre2.h header + file: + + PCRE2_ANCHORED + + If this bit is set, the pattern is forced to be "anchored", that is, it + is constrained to match only at the first matching point in the string + that is being searched (the "subject string"). This effect can also be + achieved by appropriate constructs in the pattern itself, which is the + only way to do it in Perl. + + PCRE2_ALLOW_EMPTY_CLASS + + By default, for compatibility with Perl, a closing square bracket that + immediately follows an opening one is treated as a data character for + the class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the + class, which therefore contains no characters and so can never match. + + PCRE2_ALT_BSUX + + This option request alternative handling of three escape sequences, + which makes PCRE2's behaviour more like ECMAscript (aka JavaScript). + When it is set: + + (1) \U matches an upper case "U" character; by default \U causes a com- + pile time error (Perl uses \U to upper case subsequent characters). + + (2) \u matches a lower case "u" character unless it is followed by four + hexadecimal digits, in which case the hexadecimal number defines the + code point to match. By default, \u causes a compile time error (Perl + uses it to upper case the following character). + + (3) \x matches a lower case "x" character unless it is followed by two + hexadecimal digits, in which case the hexadecimal number defines the + code point to match. By default, as in Perl, a hexadecimal number is + always expected after \x, but it may have zero, one, or two digits (so, + for example, \xz matches a binary zero character followed by z). + + ECMAscript 6 added additional functionality to \u. This can be accessed + using the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile op- + tions" below). Note that this alternative escape handling applies only + to patterns. Neither of these options affects the processing of re- + placement strings passed to pcre2_substitute(). + + PCRE2_ALT_CIRCUMFLEX + + In multiline mode (when PCRE2_MULTILINE is set), the circumflex + metacharacter matches at the start of the subject (unless PCRE2_NOTBOL + is set), and also after any internal newline. However, it does not + match after a newline at the end of the subject, for compatibility with + Perl. If you want a multiline circumflex also to match after a termi- + nating newline, you must set PCRE2_ALT_CIRCUMFLEX. + + PCRE2_ALT_EXTENDED_CLASS + + Alters the parsing of character classes to follow the extended syntax + described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no + impact on the behaviour of the Perl-specific "(?[...])" syntax for ex- + tended classes, but instead enables the alternative syntax of extended + class behaviour inside ordinary "[...]" character classes. See the + pcre2pattern documentation for details of the character classes sup- + ported. + + PCRE2_ALT_VERBNAMES + + By default, for compatibility with Perl, the name in any verb sequence + such as (*MARK:NAME) is any sequence of characters that does not in- + clude a closing parenthesis. The name is not processed in any way, and + it is not possible to include a closing parenthesis in the name. How- + ever, if the PCRE2_ALT_VERBNAMES option is set, normal backslash pro- + cessing is applied to verb names and only an unescaped closing paren- + thesis terminates the name. A closing parenthesis can be included in a + name either as \) or between \Q and \E. If the PCRE2_EXTENDED or + PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped + whitespace in verb names is skipped and #-comments are recognized, ex- + actly as in the rest of the pattern. + + PCRE2_AUTO_CALLOUT + + If this bit is set, pcre2_compile() automatically inserts callout + items, all with number 255, before each pattern item, except immedi- + ately before or after an explicit callout in the pattern. For discus- + sion of the callout facility, see the pcre2callout documentation. + + PCRE2_CASELESS + + If this bit is set, letters in the pattern match both upper and lower + case letters in the subject. It is equivalent to Perl's /i option, and + it can be changed within a pattern by a (?i) option setting. If either + PCRE2_UTF or PCRE2_UCP is set, Unicode properties are used for all + characters with more than one other case, and for all characters whose + code points are greater than U+007F. + + Note that there are two ASCII characters, K and S, that, in addition to + their lower case ASCII equivalents, are case-equivalent with U+212A + (Kelvin sign) and U+017F (long S) respectively. If you do not want this + case equivalence, you can suppress it by setting PCRE2_EXTRA_CASE- + LESS_RESTRICT. + + One language family, Turkish and Azeri, has its own case-insensitivity + rules, which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. + This alters the behaviour of the 'i', 'I', U+0130 (capital I with dot + above), and U+0131 (small dotless i) characters. + + For lower valued characters with only one other case, a lookup table is + used for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup + table is used for all code points less than 256, and higher code points + (available only in 16-bit or 32-bit mode) are treated as not having an- + other case. + + From release 10.45 PCRE2_CASELESS also affects what some of the letter- + related Unicode property escapes (\p and \P) match. The properties Lu + (upper case letter), Ll (lower case letter), and Lt (title case letter) + are all treated as LC (cased letter) when PCRE2_CASELESS is set. + + PCRE2_DOLLAR_ENDONLY + + If this bit is set, a dollar metacharacter in the pattern matches only + at the end of the subject string. Without this option, a dollar also + matches immediately before a newline at the end of the string (but not + before any other newlines). The PCRE2_DOLLAR_ENDONLY option is ignored + if PCRE2_MULTILINE is set. There is no equivalent to this option in + Perl, and no way to set it within a pattern. + + PCRE2_DOTALL + + If this bit is set, a dot metacharacter in the pattern matches any + character, including one that indicates a newline. However, it only + ever matches one character, even if newlines are coded as CRLF. Without + this option, a dot does not match when the current position in the sub- + ject is at a newline. This option is equivalent to Perl's /s option, + and it can be changed within a pattern by a (?s) option setting. A neg- + ative class such as [^a] always matches newline characters, and the \N + escape sequence always matches a non-newline character, independent of + the setting of PCRE2_DOTALL. + + PCRE2_DUPNAMES + + If this bit is set, names used to identify capture groups need not be + unique. This can be helpful for certain types of pattern when it is + known that only one instance of the named group can ever be matched. + There are more details of named capture groups below; see also the + pcre2pattern documentation. + + PCRE2_ENDANCHORED + + If this bit is set, the end of any pattern match must be right at the + end of the string being searched (the "subject string"). If the pattern + match succeeds by reaching (*ACCEPT), but does not reach the end of the + subject, the match fails at the current starting point. For unanchored + patterns, a new match is then tried at the next starting point. How- + ever, if the match succeeds by reaching the end of the pattern, but not + the end of the subject, backtracking occurs and an alternative match + may be found. Consider these two patterns: + + .(*ACCEPT)|.. + .|.. + + If matched against "abc" with PCRE2_ENDANCHORED set, the first matches + "c" whereas the second matches "bc". The effect of PCRE2_ENDANCHORED + can also be achieved by appropriate constructs in the pattern itself, + which is the only way to do it in Perl. + + For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only + to the first (that is, the longest) matched string. Other parallel + matches, which are necessarily substrings of the first one, must obvi- + ously end before the end of the subject. + + PCRE2_EXTENDED + + If this bit is set, most white space characters in the pattern are to- + tally ignored except when escaped, inside a character class, or inside + a \Q...\E sequence. However, white space is not allowed within se- + quences such as (?> that introduce various parenthesized groups, nor + within numerical quantifiers such as {1,3}. Ignorable white space is + permitted between an item and a following quantifier and between a + quantifier and a following + that indicates possessiveness. PCRE2_EX- + TENDED is equivalent to Perl's /x option, and it can be changed within + a pattern by a (?x) option setting. + + When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recog- + nizes as white space only those characters with code points less than + 256 that are flagged as white space in its low-character table. The ta- + ble is normally created by pcre2_maketables(), which uses the isspace() + function to identify space characters. In most ASCII environments, the + relevant characters are those with code points 0x0009 (tab), 0x000A + (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage + return), and 0x0020 (space). + + When PCRE2 is compiled with Unicode support, in addition to these char- + acters, five more Unicode "Pattern White Space" characters are recog- + nized by PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to- + right mark), U+200F (right-to-left mark), U+2028 (line separator), and + U+2029 (paragraph separator). This set of characters is the same as + recognized by Perl's /x option. Note that the horizontal and vertical + space characters that are matched by the \h and \v escapes in patterns + are a much bigger set. + + As well as ignoring most white space, PCRE2_EXTENDED also causes char- + acters between an unescaped # outside a character class and the next + newline, inclusive, to be ignored, which makes it possible to include + comments inside complicated patterns. Note that the end of this type of + comment is a literal newline sequence in the pattern; escape sequences + that happen to represent a newline do not count. + + Which characters are interpreted as newlines can be specified by a set- + ting in the compile context that is passed to pcre2_compile() or by a + special sequence at the start of the pattern, as described in the sec- + tion entitled "Newline conventions" in the pcre2pattern documentation. + A default is defined when PCRE2 is built. + + PCRE2_EXTENDED_MORE + + This option has the effect of PCRE2_EXTENDED, but, in addition, un- + escaped space and horizontal tab characters are ignored inside a char- + acter class. Note: only these two characters are ignored, not the full + set of pattern white space characters that are ignored outside a char- + acter class. PCRE2_EXTENDED_MORE is equivalent to Perl's /xx option, + and it can be changed within a pattern by a (?xx) option setting. + + PCRE2_FIRSTLINE + + If this option is set, the start of an unanchored pattern match must be + before or at the first newline in the subject string following the + start of matching, though the matched text may continue over the new- + line. If startoffset is non-zero, the limiting newline is not necessar- + ily the first newline in the subject. For example, if the subject + string is "abc\nxyz" (where \n represents a single-character newline) a + pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset is + greater than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more + general limiting facility. If PCRE2_FIRSTLINE is set with an offset + limit, a match must occur in the first line and also within the offset + limit. In other words, whichever limit comes first is used. This option + has no effect for anchored patterns. + + PCRE2_LITERAL + + If this option is set, all meta-characters in the pattern are disabled, + and it is treated as a literal string. Matching literal strings with a + regular expression engine is not the most efficient way of doing it. If + you are doing a lot of literal matching and are worried about effi- + ciency, you should consider using other approaches. The only other main + options that are allowed with PCRE2_LITERAL are: PCRE2_ANCHORED, + PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, PCRE2_CASELESS, PCRE2_FIRSTLINE, + PCRE2_MATCH_INVALID_UTF, PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, + PCRE2_UTF, and PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EX- + TRA_MATCH_LINE and PCRE2_EXTRA_MATCH_WORD are also supported. Any other + options cause an error. + + PCRE2_MATCH_INVALID_UTF + + This option forces PCRE2_UTF (see below) and also enables support for + matching by pcre2_match() in subject strings that contain invalid UTF + sequences. Note, however, that the 16-bit and 32-bit PCRE2 libraries + process strings as sequences of uint16_t or uint32_t code points. They + cannot find valid UTF sequences within an arbitrary string of bytes un- + less such sequences are suitably aligned. This facility is not sup- + ported for DFA matching. For details, see the pcre2unicode documenta- + tion. + + PCRE2_MATCH_UNSET_BACKREF + + If this option is set, a backreference to an unset capture group + matches an empty string (by default this causes the current matching + alternative to fail). A pattern such as (\1)(a) succeeds when this op- + tion is set (assuming it can find an "a" in the subject), whereas it + fails by default, for Perl compatibility. Setting this option makes + PCRE2 behave more like ECMAscript (aka JavaScript). + + PCRE2_MULTILINE + + By default, for the purposes of matching "start of line" and "end of + line", PCRE2 treats the subject string as consisting of a single line + of characters, even if it actually contains newlines. The "start of + line" metacharacter (^) matches only at the start of the string, and + the "end of line" metacharacter ($) matches only at the end of the + string, or before a terminating newline (except when PCRE2_DOLLAR_EN- + DONLY is set). Note, however, that unless PCRE2_DOTALL is set, the "any + character" metacharacter (.) does not match at a newline. This behav- + iour (for ^, $, and dot) is the same as Perl. + + When PCRE2_MULTILINE it is set, the "start of line" and "end of line" + constructs match immediately following or immediately before internal + newlines in the subject string, respectively, as well as at the very + start and end. This is equivalent to Perl's /m option, and it can be + changed within a pattern by a (?m) option setting. Note that the "start + of line" metacharacter does not match after a newline at the end of the + subject, for compatibility with Perl. However, you can change this by + setting the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a + subject string, or no occurrences of ^ or $ in a pattern, setting + PCRE2_MULTILINE has no effect. + + PCRE2_NEVER_BACKSLASH_C + + This option locks out the use of \C in the pattern that is being com- + piled. This escape can cause unpredictable behaviour in UTF-8 or + UTF-16 modes, because it may leave the current matching point in the + middle of a multi-code-unit character. This option may be useful in ap- + plications that process patterns from external sources. Note that there + is also a build-time option that permanently locks out the use of \C. + + PCRE2_NEVER_UCP + + This option locks out the use of Unicode properties for handling \B, + \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as + described for the PCRE2_UCP option below. In particular, it prevents + the creator of the pattern from enabling this facility by starting the + pattern with (*UCP). This option may be useful in applications that + process patterns from external sources. The option combination + PCRE2_UCP and PCRE2_NEVER_UCP causes an error. + + PCRE2_NEVER_UTF + + This option locks out interpretation of the pattern as UTF-8, UTF-16, + or UTF-32, depending on which library is in use. In particular, it pre- + vents the creator of the pattern from switching to UTF interpretation + by starting the pattern with (*UTF). This option may be useful in ap- + plications that process patterns from external sources. The combination + of PCRE2_UTF and PCRE2_NEVER_UTF causes an error. + + PCRE2_NO_AUTO_CAPTURE + + If this option is set, it disables the use of numbered capturing paren- + theses in the pattern. Any opening parenthesis that is not followed by + ? behaves as if it were followed by ?: but named parentheses can still + be used for capturing (and they acquire numbers in the usual way). This + is the same as Perl's /n option. Note that, when this option is set, + references to capture groups (backreferences or recursion/subroutine + calls) may only refer to named groups, though the reference can be by + name or by number. + + PCRE2_NO_AUTO_POSSESS + + If this (deprecated) option is set, it disables "auto-possessifica- + tion", which is an optimization that, for example, turns a+b into a++b + in order to avoid backtracks into a+ that can never be successful. How- + ever, if callouts are in use, auto-possessification means that some + callouts are never taken. You can set this option if you want the + matching functions to do a full unoptimized search and run all the + callouts, but it is mainly provided for testing purposes. + + If a compile context is available, it is recommended to use + pcre2_set_optimize() with the directive PCRE2_AUTO_POSSESS_OFF rather + than the compile option PCRE2_NO_AUTO_POSSESS. Note that + PCRE2_NO_AUTO_POSSESS takes precedence over the pcre2_set_optimize() + optimization directives PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF. + + PCRE2_NO_DOTSTAR_ANCHOR + + If this (deprecated) option is set, it disables an optimization that is + applied when .* is the first significant item in a top-level branch of + a pattern, and all the other branches also start with .* or with \A or + \G or ^. The optimization is automatically disabled for .* if it is in- + side an atomic group or a capture group that is the subject of a back- + reference, or if the pattern contains (*PRUNE) or (*SKIP). When the op- + timization is not disabled, such a pattern is automatically anchored if + PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set + for any ^ items. Otherwise, the fact that any match must start either + at the start of the subject or following a newline is remembered. Like + other optimizations, this can cause callouts to be skipped. (If a com- + pile context is available, it is recommended to use pcre2_set_opti- + mize() with the directive PCRE2_DOTSTAR_ANCHOR_OFF instead.) + + PCRE2_NO_START_OPTIMIZE + + This is an option whose main effect is at matching time. It does not + change what pcre2_compile() generates, but it does affect the output of + the JIT compiler. Setting this option is equivalent to calling + pcre2_set_optimize() with the directive parameter set to + PCRE2_START_OPTIMIZE_OFF. + + There are a number of optimizations that may occur at the start of a + match, in order to speed up the process. For example, if it is known + that an unanchored match must start with a specific code unit value, + the matching code searches the subject for that value, and fails imme- + diately if it cannot find it, without actually running the main match- + ing function. The start-up optimizations are in effect a pre-scan of + the subject that takes place before the pattern is run. + + Disabling the start-up optimizations may cause performance to suffer. + However, this may be desirable for patterns which contain callouts or + items such as (*COMMIT) and (*MARK). See the above description of + PCRE2_START_OPTIMIZE_OFF for further details. + + PCRE2_NO_UTF_CHECK + + When PCRE2_UTF is set, the validity of the pattern as a UTF string is + automatically checked. There are discussions about the validity of + UTF-8 strings, UTF-16 strings, and UTF-32 strings in the pcre2unicode + document. If an invalid UTF sequence is found, pcre2_compile() returns + a negative error code. + + If you know that your pattern is a valid UTF string, and you want to + skip this check for performance reasons, you can set the + PCRE2_NO_UTF_CHECK option. When it is set, the effect of passing an in- + valid UTF string as a pattern is undefined. It may cause your program + to crash or loop. + + Note that this option can also be passed to pcre2_match() and + pcre2_dfa_match(), to suppress UTF validity checking of the subject + string. + + Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis- + able the error that is given if an escape sequence for an invalid Uni- + code code point is encountered in the pattern. In particular, the so- + called "surrogate" code points (0xd800 to 0xdfff) are invalid. If you + want to allow escape sequences such as \x{d800} you can set the + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option, as described in the + section entitled "Extra compile options" below. However, this is pos- + sible only in UTF-8 and UTF-32 modes, because these values are not rep- + resentable in UTF-16. + + PCRE2_UCP + + This option has two effects. Firstly, it change the way PCRE2 processes + \B, \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character + classes. By default, only ASCII characters are recognized, but if + PCRE2_UCP is set, Unicode properties are used to classify characters. + There are some PCRE2_EXTRA options (see below) that add finer control + to this behaviour. More details are given in the section on generic + character types in the pcre2pattern page. + + The second effect of PCRE2_UCP is to force the use of Unicode proper- + ties for upper/lower casing operations, even when PCRE2_UTF is not set. + This makes it possible to process strings in the 16-bit UCS-2 code. + This option is available only if PCRE2 has been compiled with Unicode + support (which is the default). + + The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless + matching such that ASCII characters match only ASCII characters and + non-ASCII characters match only non-ASCII characters. The PCRE2_EX- + TRA_TURKISH_CASING option (see above) alters the matching of the 'i' + characters to follow their behaviour in Turkish and Azeri languages. + For further details on PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EX- + TRA_TURKISH_CASING, see the pcre2unicode page. + + PCRE2_UNGREEDY + + This option inverts the "greediness" of the quantifiers so that they + are not greedy by default, but become greedy if followed by "?". It is + not compatible with Perl. It can also be set by a (?U) option setting + within the pattern. + + PCRE2_USE_OFFSET_LIMIT + + This option must be set for pcre2_compile() if pcre2_set_offset_limit() + is going to be used to set a non-default offset limit in a match con- + text for matches that use this pattern. An error is generated if an + offset limit is set without this option. For more details, see the de- + scription of pcre2_set_offset_limit() in the section that describes + match contexts. See also the PCRE2_FIRSTLINE option above. + + PCRE2_UTF + + This option causes PCRE2 to regard both the pattern and the subject + strings that are subsequently processed as strings of UTF characters + instead of single-code-unit strings. It is available when PCRE2 is + built to include Unicode support (which is the default). If Unicode + support is not available, the use of this option provokes an error. De- + tails of how PCRE2_UTF changes the behaviour of PCRE2 are given in the + pcre2unicode page. In particular, note that it changes the way + PCRE2_CASELESS works. + + Extra compile options + + The option bits that can be set in a compile context by calling the + pcre2_set_compile_extra_options() function are as follows: + + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK + + Since release 10.38 PCRE2 has forbidden the use of \K within lookaround + assertions, following Perl's lead. This option is provided to re-enable + the previous behaviour (act in positive lookarounds, ignore in negative + ones) in case anybody is relying on it. + + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + + This option applies when compiling a pattern in UTF-8 or UTF-32 mode. + It is forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode + "surrogate" code points in the range 0xd800 to 0xdfff are used in pairs + in UTF-16 to encode code points with values in the range 0x10000 to + 0x10ffff. The surrogates cannot therefore be represented in UTF-16. + They can be represented in UTF-8 and UTF-32, but are defined as invalid + code points, and cause errors if encountered in a UTF-8 or UTF-32 + string that is being checked for validity by PCRE2. + + These values also cause errors if encountered in escape sequences such + as \x{d912} within a pattern. However, it seems that some applications, + when using PCRE2 to check for unwanted characters in UTF-8 strings, ex- + plicitly test for the surrogates using escape sequences. The + PCRE2_NO_UTF_CHECK option does not disable the error that occurs, be- + cause it applies only to the testing of input strings for UTF validity. + + If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surro- + gate code point values in UTF-8 and UTF-32 patterns no longer provoke + errors and are incorporated in the compiled pattern. However, they can + only match subject characters if the matching function is called with + PCRE2_NO_UTF_CHECK set. + + PCRE2_EXTRA_ALT_BSUX + + The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and + \x in the way that ECMAscript (aka JavaScript) does. Additional func- + tionality was defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has + the effect of PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} + as a hexadecimal character code, where hhh.. is any number of hexadeci- + mal digits. + + PCRE2_EXTRA_ASCII_BSD + + This option forces \d to match only ASCII digits, even when PCRE2_UCP + is set. It can be changed within a pattern by means of the (?aD) op- + tion setting. + + PCRE2_EXTRA_ASCII_BSS + + This option forces \s to match only ASCII space characters, even when + PCRE2_UCP is set. It can be changed within a pattern by means of the + (?aS) option setting. + + PCRE2_EXTRA_ASCII_BSW + + This option forces \w to match only ASCII word characters, even when + PCRE2_UCP is set. It can be changed within a pattern by means of the + (?aW) option setting. + + PCRE2_EXTRA_ASCII_DIGIT + + This option forces the POSIX character classes [:digit:] and [:xdigit:] + to match only ASCII digits, even when PCRE2_UCP is set. It can be + changed within a pattern by means of the (?aT) option setting. + + PCRE2_EXTRA_ASCII_POSIX + + This option forces all the POSIX character classes, including [:digit:] + and [:xdigit:], to match only ASCII characters, even when PCRE2_UCP is + set. It can be changed within a pattern by means of the (?aP) option + setting, but note that this also sets PCRE2_EXTRA_ASCII_DIGIT in order + to ensure that (?-aP) unsets all ASCII restrictions for POSIX classes. + + PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + + This is a dangerous option. Use with care. By default, an unrecognized + escape such as \j or a malformed one such as \x{2z} causes a compile- + time error when detected by pcre2_compile(). Perl is somewhat inconsis- + tent in handling such items: for example, \j is treated as a literal + "j", and non-hexadecimal digits in \x{} are just ignored, though warn- + ings are given in both cases if Perl's warning switch is enabled. How- + ever, a malformed octal number after \o{ always causes an error in + Perl. + + If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to + pcre2_compile(), all unrecognized or malformed escape sequences are + treated as single-character escapes. For example, \j is a literal "j" + and \x{2z} is treated as the literal string "x{2z}". Setting this op- + tion means that typos in patterns may go undetected and have unexpected + results. Also note that a sequence such as [\N{] is interpreted as a + malformed attempt at [\N{...}] and so is treated as [N{] whereas [\N] + gives an error because an unqualified \N is a valid escape sequence but + is not supported in a character class. To reiterate: this is a danger- + ous option. Use with great care. + + PCRE2_EXTRA_CASELESS_RESTRICT + + When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows + Unicode rules, which allow for more than two cases per character. There + are two case-equivalent character sets that contain both ASCII and non- + ASCII characters. The ASCII letter S is case-equivalent to U+017f (long + S) and the ASCII letter K is case-equivalent to U+212a (Kelvin sign). + This option disables recognition of case-equivalences that cross the + ASCII/non-ASCII boundary. In a caseless match, both characters must ei- + ther be ASCII or non-ASCII. The option can be changed within a pattern + by the (*CASELESS_RESTRICT) or (?r) option settings. + + PCRE2_EXTRA_ESCAPED_CR_IS_LF + + There are some legacy applications where the escape sequence \r in a + pattern is expected to match a newline. If this option is set, \r in a + pattern is converted to \n so that it matches a LF (linefeed) instead + of a CR (carriage return) character. The option does not affect a lit- + eral CR in the pattern, nor does it affect CR specified as an explicit + code point such as \x{0D}. + + PCRE2_EXTRA_MATCH_LINE + + This option is provided for use by the -x option of pcre2grep. It + causes the pattern only to match complete lines. This is achieved by + automatically inserting the code for "^(?:" at the start of the com- + piled pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, + the matched line may be in the middle of the subject string. This op- + tion can be used with PCRE2_LITERAL. + + PCRE2_EXTRA_MATCH_WORD + + This option is provided for use by the -w option of pcre2grep. It + causes the pattern only to match strings that have a word boundary at + the start and the end. This is achieved by automatically inserting the + code for "\b(?:" at the start of the compiled pattern and ")\b" at the + end. The option may be used with PCRE2_LITERAL. However, it is ignored + if PCRE2_EXTRA_MATCH_LINE is also set. + + PCRE2_EXTRA_NO_BS0 + + If this option is set (note that its final character is the digit 0) it + locks out the use of the sequence \0 unless at least one more octal + digit follows. + + PCRE2_EXTRA_PYTHON_OCTAL + + If this option is set, PCRE2 follows Python's rules for interpreting + octal escape sequences. The rules for handling sequences such as \14, + which could be an octal number or a back reference are different. De- + tails are given in the pcre2pattern documentation. + + PCRE2_EXTRA_NEVER_CALLOUT + + If this option is set, PCRE2 treats callouts in the pattern as a syntax + error, returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if + the application knows that a callout will not be provided to + pcre2_match(), so that callouts in the pattern are not silently ig- + nored. + + PCRE2_EXTRA_TURKISH_CASING + + This option alters case-equivalence of the 'i' letters to follow the + alphabet used by Turkish and Azeri languages. The option can be changed + within a pattern by the (*TURKISH_CASING) start-of-pattern setting. Ei- + ther the UTF or UCP options must be set. In the 8-bit library, UTF must + be set. This option cannot be combined with PCRE2_EXTRA_CASELESS_RE- + STRICT. + + +JUST-IN-TIME (JIT) COMPILATION + + int pcre2_jit_compile(pcre2_code *code, uint32_t options); + + int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); + + pcre2_jit_stack *pcre2_jit_stack_create(size_t startsize, + size_t maxsize, pcre2_general_context *gcontext); + + void pcre2_jit_stack_assign(pcre2_match_context *mcontext, + pcre2_jit_callback callback_function, void *callback_data); + + void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack); + + These functions provide support for JIT compilation, which, if the + just-in-time compiler is available, further processes a compiled pat- + tern into machine code that executes much faster than the pcre2_match() + interpretive matching function. Full details are given in the pcre2jit + documentation. + + JIT compilation is a heavyweight optimization. It can take some time + for patterns to be analyzed, and for one-off matches and simple pat- + terns the benefit of faster execution might be offset by a much slower + compilation time. Most (but not all) patterns can be optimized by the + JIT compiler. + + +LOCALE SUPPORT + + const uint8_t *pcre2_maketables(pcre2_general_context *gcontext); + + void pcre2_maketables_free(pcre2_general_context *gcontext, + const uint8_t *tables); + + PCRE2 handles caseless matching, and determines whether characters are + letters, digits, or whatever, by reference to a set of tables, indexed + by character code point. However, this applies only to characters whose + code points are less than 256. By default, higher-valued code points + never match escapes such as \w or \d. + + When PCRE2 is built with Unicode support (the default), certain Unicode + character properties can be tested with \p and \P, or, alternatively, + the PCRE2_UCP option can be set when a pattern is compiled; this causes + \w and friends to use Unicode property support instead of the built-in + tables. PCRE2_UCP also causes upper/lower casing operations on charac- + ters with code points greater than 127 to use Unicode properties. These + effects apply even when PCRE2_UTF is not set. There are, however, some + PCRE2_EXTRA options (see above) that can be used to modify or suppress + them. + + The use of locales with Unicode is discouraged. If you are handling + characters with code points greater than 127, you should either use + Unicode support, or use locales, but not try to mix the two. + + PCRE2 contains a built-in set of character tables that are used by de- + fault. These are sufficient for many applications. Normally, the in- + ternal tables recognize only ASCII characters. However, when PCRE2 is + built, it is possible to cause the internal tables to be rebuilt in the + default "C" locale of the local system, which may cause them to be dif- + ferent. + + The built-in tables can be overridden by tables supplied by the appli- + cation that calls PCRE2. These may be created in a different locale + from the default. As more and more applications change to using Uni- + code, the need for this locale support is expected to die away. + + External tables are built by calling the pcre2_maketables() function, + in the relevant locale. The only argument to this function is a general + context, which can be used to pass a custom memory allocator. If the + argument is NULL, the system malloc() is used. The result can be passed + to pcre2_compile() as often as necessary, by creating a compile context + and calling pcre2_set_character_tables() to set the tables pointer + therein. + + For example, to build and use tables that are appropriate for the + French locale (where accented characters with values greater than 127 + are treated as letters), the following code could be used: + + setlocale(LC_CTYPE, "fr_FR"); + tables = pcre2_maketables(NULL); + ccontext = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(ccontext, tables); + re = pcre2_compile(..., ccontext); + + The locale name "fr_FR" is used on Linux and other Unix-like systems; + if you are using Windows, the name for the French locale is "french". + + The pointer that is passed (via the compile context) to pcre2_compile() + is saved with the compiled pattern, and the same tables are used by the + matching functions. Thus, for any single pattern, compilation and + matching both happen in the same locale, but different patterns can be + processed in different locales. + + It is the caller's responsibility to ensure that the memory containing + the tables remains available while they are still in use. When they are + no longer needed, you can discard them using pcre2_maketables_free(), + which should pass as its first parameter the same global context that + was used to create the tables. + + Saving locale tables + + The tables described above are just a sequence of binary bytes, which + makes them independent of hardware characteristics such as endianness + or whether the processor is 32-bit or 64-bit. A copy of the result of + pcre2_maketables() can therefore be saved in a file or elsewhere and + re-used later, even in a different program or on another computer. The + size of the tables (number of bytes) must be obtained by calling + pcre2_config() with the PCRE2_CONFIG_TABLES_LENGTH option because + pcre2_maketables() does not return this value. Note that the + pcre2_dftables program, which is part of the PCRE2 build system, can be + used stand-alone to create a file that contains a set of binary tables. + See the pcre2build documentation for details. + + +INFORMATION ABOUT A COMPILED PATTERN + + int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where); + + The pcre2_pattern_info() function returns general information about a + compiled pattern. For information about callouts, see the next section. + The first argument for pcre2_pattern_info() is a pointer to the com- + piled pattern. The second argument specifies which piece of information + is required, and the third argument is a pointer to a variable to re- + ceive the data. If the third argument is NULL, the first argument is + ignored, and the function returns the size in bytes of the variable + that is required for the information requested. Otherwise, the yield of + the function is zero for success, or one of the following negative num- + bers: + + PCRE2_ERROR_NULL the argument code was NULL + PCRE2_ERROR_BADMAGIC the "magic number" was not found + PCRE2_ERROR_BADOPTION the value of what was invalid + PCRE2_ERROR_UNSET the requested field is not set + + The "magic number" is placed at the start of each compiled pattern as a + simple check against passing an arbitrary memory pointer. Here is a + typical call of pcre2_pattern_info(), to obtain the length of the com- + piled pattern: + + int rc; + size_t length; + rc = pcre2_pattern_info( + re, /* result of pcre2_compile() */ + PCRE2_INFO_SIZE, /* what is required */ + &length); /* where to put the data */ + + The possible values for the second argument are defined in pcre2.h, and + are as follows: + + PCRE2_INFO_ALLOPTIONS + PCRE2_INFO_ARGOPTIONS + PCRE2_INFO_EXTRAOPTIONS + + Return copies of the pattern's options. The third argument should point + to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the op- + tions that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOP- + TIONS returns the compile options as modified by any top-level (*XXX) + option settings such as (*UTF) at the start of the pattern itself. + PCRE2_INFO_EXTRAOPTIONS returns the extra options that were set in the + compile context by calling the pcre2_set_compile_extra_options() func- + tion. + + For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EX- + TENDED option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED + and PCRE2_UTF. Option settings such as (?i) that can change within a + pattern do not affect the result of PCRE2_INFO_ALLOPTIONS, even if they + appear right at the start of the pattern. (This was different in some + earlier releases.) + + A pattern compiled without PCRE2_ANCHORED is automatically anchored by + PCRE2 if the first significant item in every top-level branch is one of + the following: + + ^ unless PCRE2_MULTILINE is set + \A always + \G always + .* sometimes - see below + + When .* is the first significant item, anchoring is possible only when + all the following are true: + + .* is not in an atomic group + .* is not in a capture group that is the subject + of a backreference + PCRE2_DOTALL is in force for .* + Neither (*PRUNE) nor (*SKIP) appears in the pattern + PCRE2_NO_DOTSTAR_ANCHOR is not set + Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF + + For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in + the options returned for PCRE2_INFO_ALLOPTIONS. + + PCRE2_INFO_BACKREFMAX + + Return the number of the highest backreference in the pattern. The + third argument should point to a uint32_t variable. Named capture + groups acquire numbers as well as names, and these count towards the + highest backreference. Backreferences such as \4 or \g{12} match the + captured characters of the given group, but in addition, the check that + a capture group is set in a conditional group such as (?(3)a|b) is also + a backreference. Zero is returned if there are no backreferences. + + PCRE2_INFO_BSR + + The output is a uint32_t integer whose value indicates what character + sequences the \R escape sequence matches. A value of PCRE2_BSR_UNICODE + means that \R matches any Unicode line ending sequence; a value of + PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. + + PCRE2_INFO_CAPTURECOUNT + + Return the highest capture group number in the pattern. In patterns + where (?| is not used, this is also the total number of capture groups. + The third argument should point to a uint32_t variable. + + PCRE2_INFO_DEPTHLIMIT + + If the pattern set a backtracking depth limit by including an item of + the form (*LIMIT_DEPTH=nnnn) at the start, the value is returned. The + third argument should point to a uint32_t integer. If no such value has + been set, the call to pcre2_pattern_info() returns the error PCRE2_ER- + ROR_UNSET. Note that this limit will only be used during matching if it + is less than the limit set or defaulted by the caller of the match + function. + + PCRE2_INFO_FIRSTBITMAP + + In the absence of a single first code unit for a non-anchored pattern, + pcre2_compile() may construct a 256-bit table that defines a fixed set + of values for the first code unit in any match. For example, a pattern + that starts with [abc] results in a table with three bits set. When + code unit values greater than 255 are supported, the flag bit for 255 + means "any code unit of value 255 or above". If such a table was con- + structed, a pointer to it is returned. Otherwise NULL is returned. The + third argument should point to a const uint8_t * variable. + + PCRE2_INFO_FIRSTCODETYPE + + Return information about the first code unit of any matched string, for + a non-anchored pattern. The third argument should point to a uint32_t + variable. If there is a fixed first value, for example, the letter "c" + from a pattern such as (cat|cow|coyote), 1 is returned, and the value + can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed + first value, but it is known that a match can occur only at the start + of the subject or following a newline in the subject, 2 is returned. + Otherwise, and for anchored patterns, 0 is returned. + + PCRE2_INFO_FIRSTCODEUNIT + + Return the value of the first code unit of any matched string for a + pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. + The third argument should point to a uint32_t variable. In the 8-bit + library, the value is always less than 256. In the 16-bit library the + value can be up to 0xffff. In the 32-bit library in UTF-32 mode the + value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 + mode. + + PCRE2_INFO_FRAMESIZE + + Return the size (in bytes) of the data frames that are used to remember + backtracking positions when the pattern is processed by pcre2_match() + without the use of JIT. The third argument should point to a size_t + variable. The frame size depends on the number of capturing parentheses + in the pattern. Each additional capture group adds two PCRE2_SIZE vari- + ables. + + PCRE2_INFO_HASBACKSLASHC + + Return 1 if the pattern contains any instances of \C, otherwise 0. The + third argument should point to a uint32_t variable. + + PCRE2_INFO_HASCRORLF + + Return 1 if the pattern contains any explicit matches for CR or LF + characters, otherwise 0. The third argument should point to a uint32_t + variable. An explicit match is either a literal CR or LF character, or + \r or \n or one of the equivalent hexadecimal or octal escape se- + quences. + + PCRE2_INFO_HEAPLIMIT + + If the pattern set a heap memory limit by including an item of the form + (*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu- + ment should point to a uint32_t integer. If no such value has been set, + the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UNSET. + Note that this limit will only be used during matching if it is less + than the limit set or defaulted by the caller of the match function. + + PCRE2_INFO_JCHANGED + + Return 1 if the (?J) or (?-J) option setting is used in the pattern, + otherwise 0. The third argument should point to a uint32_t variable. + (?J) and (?-J) set and unset the local PCRE2_DUPNAMES option, respec- + tively. + + PCRE2_INFO_JITSIZE + + If the compiled pattern was successfully processed by pcre2_jit_com- + pile(), return the size of the JIT compiled code, otherwise return + zero. The third argument should point to a size_t variable. + + PCRE2_INFO_LASTCODETYPE + + Returns 1 if there is a rightmost literal code unit that must exist in + any matched string, other than at its start. The third argument should + point to a uint32_t variable. If there is no such value, 0 is returned. + When 1 is returned, the code unit value itself can be retrieved using + PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is + recorded only if it follows something of variable length. For example, + for the pattern /^a\d+z\d+/ the returned value is 1 (with "z" returned + from PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is + 0. + + PCRE2_INFO_LASTCODEUNIT + + Return the value of the rightmost literal code unit that must exist in + any matched string, other than at its start, for a pattern where + PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argu- + ment should point to a uint32_t variable. + + PCRE2_INFO_MATCHEMPTY + + Return 1 if the pattern might match an empty string, otherwise 0. The + third argument should point to a uint32_t variable. When a pattern con- + tains recursive subroutine calls it is not always possible to determine + whether or not it can match an empty string. PCRE2 takes a cautious ap- + proach and returns 1 in such cases. + + PCRE2_INFO_MATCHLIMIT + + If the pattern set a match limit by including an item of the form + (*LIMIT_MATCH=nnnn) at the start, the value is returned. The third ar- + gument should point to a uint32_t integer. If no such value has been + set, the call to pcre2_pattern_info() returns the error PCRE2_ERROR_UN- + SET. Note that this limit will only be used during matching if it is + less than the limit set or defaulted by the caller of the match func- + tion. + + PCRE2_INFO_MAXLOOKBEHIND + + A lookbehind assertion moves back a certain number of characters (not + code units) when it starts to process each of its branches. This re- + quest returns the largest of these backward moves. The third argument + should point to a uint32_t integer. The simple assertions \b and \B re- + quire a one-character lookbehind and cause PCRE2_INFO_MAXLOOKBEHIND to + return 1 in the absence of anything longer. \A also registers a one- + character lookbehind, though it does not actually inspect the previous + character. + + Note that this information is useful for multi-segment matching only if + the pattern contains no nested lookbehinds. For example, the pattern + (?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is + processed, the first lookbehind moves back by two characters, matches + one character, then the nested lookbehind also moves back by two char- + acters. This puts the matching point three characters earlier than it + was at the start. PCRE2_INFO_MAXLOOKBEHIND is really only useful as a + debugging tool. See the pcre2partial documentation for a discussion of + multi-segment matching. + + PCRE2_INFO_MINLENGTH + + If a minimum length for matching subject strings was computed, its + value is returned. Otherwise the returned value is 0. This value is not + computed when PCRE2_NO_START_OPTIMIZE is set. The value is a number of + characters, which in UTF mode may be different from the number of code + units. The third argument should point to a uint32_t variable. The + value is a lower bound to the length of any matching string. There may + not be any strings of that length that do actually match, but every + string that does match is at least that long. + + PCRE2_INFO_NAMECOUNT + PCRE2_INFO_NAMEENTRYSIZE + PCRE2_INFO_NAMETABLE + + PCRE2 supports the use of named as well as numbered capturing parenthe- + ses. The names are just an additional way of identifying the parenthe- + ses, which still acquire numbers. Several convenience functions such as + pcre2_substring_get_byname() are provided for extracting captured sub- + strings by name. It is also possible to extract the data directly, by + first converting the name to a number in order to access the correct + pointers in the output vector (described with pcre2_match() below). To + do the conversion, you need to use the name-to-number map, which is de- + scribed by these three values. + + The map consists of a number of fixed-size entries. PCRE2_INFO_NAME- + COUNT gives the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives + the size of each entry in code units; both of these return a uint32_t + value. The entry size depends on the length of the longest name. + + PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table. + This is a PCRE2_SPTR pointer to a block of code units. In the 8-bit li- + brary, the first two bytes of each entry are the number of the captur- + ing parenthesis, most significant byte first. In the 16-bit library, + the pointer points to 16-bit code units, the first of which contains + the parenthesis number. In the 32-bit library, the pointer points to + 32-bit code units, the first of which contains the parenthesis number. + The rest of the entry is the corresponding name, zero terminated. + + The names are in alphabetical order. If (?| is used to create multiple + capture groups with the same number, as described in the section on du- + plicate group numbers in the pcre2pattern page, the groups may be given + the same name, but there is only one entry in the table. Different + names for groups of the same number are not permitted. + + Duplicate names for capture groups with different numbers are permit- + ted, but only if PCRE2_DUPNAMES is set. They appear in the table in the + order in which they were found in the pattern. In the absence of (?| + this is the order of increasing number; when (?| is used this is not + necessarily the case because later capture groups may have lower num- + bers. + + As a simple example of the name/number table, consider the following + pattern after compilation by the 8-bit library (assume PCRE2_EXTENDED + is set, so white space - including newlines - is ignored): + + (? (?(\d\d)?\d\d) - + (?\d\d) - (?\d\d) ) + + There are four named capture groups, so the table has four entries, and + each entry in the table is eight bytes long. The table is as follows, + with non-printing bytes shows in hexadecimal, and undefined bytes shown + as ??: + + 00 01 d a t e 00 ?? + 00 05 d a y 00 ?? ?? + 00 04 m o n t h 00 + 00 02 y e a r 00 ?? + + When writing code to extract data from named capture groups using the + name-to-number map, remember that the length of the entries is likely + to be different for each compiled pattern. + + PCRE2_INFO_NEWLINE + + The output is one of the following uint32_t values: + + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) + + This identifies the character sequence that will be recognized as mean- + ing "newline" while matching. + + PCRE2_INFO_SIZE + + Return the size of the compiled pattern in bytes (for all three li- + braries). The third argument should point to a size_t variable. This + value includes the size of the general data block that precedes the + code units of the compiled pattern itself. The value that is used when + pcre2_compile() is getting memory in which to place the compiled pat- + tern may be slightly larger than the value returned by this option, be- + cause there are cases where the code that calculates the size has to + over-estimate. Processing a pattern with the JIT compiler does not al- + ter the value returned by this option. + + +INFORMATION ABOUT A PATTERN'S CALLOUTS + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + A script language that supports the use of string arguments in callouts + might like to scan all the callouts in a pattern before running the + match. This can be done by calling pcre2_callout_enumerate(). The first + argument is a pointer to a compiled pattern, the second points to a + callback function, and the third is arbitrary user data. The callback + function is called for every callout in the pattern in the order in + which they appear. Its first argument is a pointer to a callout enumer- + ation block, and its second argument is the user_data value that was + passed to pcre2_callout_enumerate(). The contents of the callout enu- + meration block are described in the pcre2callout documentation, which + also gives further details about callouts. + + +SERIALIZATION AND PRECOMPILING + + It is possible to save compiled patterns on disc or elsewhere, and re- + load them later, subject to a number of restrictions. The host on which + the patterns are reloaded must be running the same version of PCRE2, + with the same code unit width, and must also have the same endianness, + pointer width, and PCRE2_SIZE type. Before compiled patterns can be + saved, they must be converted to a "serialized" form, which in the case + of PCRE2 is really just a bytecode dump. The functions whose names be- + gin with pcre2_serialize_ are used for converting to and from the seri- + alized form. They are described in the pcre2serialize documentation. + Note that PCRE2 serialization does not convert compiled patterns to an + abstract format like Java or .NET serialization. + + +THE MATCH DATA BLOCK + + pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize, + pcre2_general_context *gcontext); + + pcre2_match_data *pcre2_match_data_create_from_pattern( + const pcre2_code *code, pcre2_general_context *gcontext); + + void pcre2_match_data_free(pcre2_match_data *match_data); + + Information about a successful or unsuccessful match is placed in a + match data block, which is an opaque structure that is accessed by + function calls. In particular, the match data block contains a vector + of offsets into the subject string that define the matched parts of the + subject. This is known as the ovector. + + Before calling pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match() + you must create a match data block by calling one of the creation func- + tions above. For pcre2_match_data_create(), the first argument is the + number of pairs of offsets in the ovector. + + When using pcre2_match(), one pair of offsets is required to identify + the string that matched the whole pattern, with an additional pair for + each captured substring. For example, a value of 4 creates enough space + to record the matched portion of the subject plus three captured sub- + strings. + + When using pcre2_dfa_match() there may be multiple matched substrings + of different lengths at the same point in the subject. The ovector + should be made large enough to hold as many as are expected. + + A minimum of at least 1 pair is imposed by pcre2_match_data_create(), + so it is always possible to return the overall matched string in the + case of pcre2_match() or the longest match in the case of + pcre2_dfa_match(). The maximum number of pairs is 65535; if the first + argument of pcre2_match_data_create() is greater than this, 65535 is + used. + + The second argument of pcre2_match_data_create() is a pointer to a gen- + eral context, which can specify custom memory management for obtaining + the memory for the match data block. If you are not using custom memory + management, pass NULL, which causes malloc() to be used. + + For pcre2_match_data_create_from_pattern(), the first argument is a + pointer to a compiled pattern. The ovector is created to be exactly the + right size to hold all the substrings a pattern might capture when + matched using pcre2_match(). You should not use this call when matching + with pcre2_dfa_match(). The second argument is again a pointer to a + general context, but in this case if NULL is passed, the memory is ob- + tained using the same allocator that was used for the compiled pattern + (custom or default). + + A match data block can be used many times, with the same or different + compiled patterns. You can extract information from a match data block + after a match operation has finished, using functions that are de- + scribed in the sections on matched strings and other match data below. + + When a call of pcre2_match() fails, valid data is available in the + match block only when the error is PCRE2_ERROR_NOMATCH, PCRE2_ER- + ROR_PARTIAL, or one of the error codes for an invalid UTF string. Ex- + actly what is available depends on the error, and is detailed below. + + When one of the matching functions is called, pointers to the compiled + pattern and the subject string are set in the match data block so that + they can be referenced by the extraction functions after a successful + match. After running a match, you must not free a compiled pattern or a + subject string until after all operations on the match data block (for + that match) have taken place, unless, in the case of the subject + string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is + described in the section entitled "Option bits for pcre2_match()" be- + low. + + When a match data block itself is no longer needed, it should be freed + by calling pcre2_match_data_free(). If this function is called with a + NULL argument, it returns immediately, without doing anything. + + +MEMORY USE FOR MATCH DATA BLOCKS + + PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_match_data_heapframes_size( + pcre2_match_data *match_data); + + The size of a match data block depends on the size of the ovector that + it contains. The function pcre2_get_match_data_size() returns the size, + in bytes, of the block that is its argument. + + When pcre2_match() runs interpretively (that is, without using JIT), it + makes use of a vector of data frames for remembering backtracking posi- + tions. The size of each individual frame depends on the number of cap- + turing parentheses in the pattern and can be obtained by calling + pcre2_pattern_info() with the PCRE2_INFO_FRAMESIZE option (see the sec- + tion entitled "Information about a compiled pattern" above). + + Heap memory is used for the frames vector; if the initial memory block + turns out to be too small during matching, it is automatically ex- + panded. When pcre2_match() returns, the memory is not freed, but re- + mains attached to the match data block, for use by any subsequent + matches that use the same block. It is automatically freed when the + match data block itself is freed. + + You can find the current size of the frames vector that a match data + block owns by calling pcre2_get_match_data_heapframes_size(). For a + newly created match data block the size will be zero. Some types of + match may require a lot of frames and thus a large vector; applications + that run in environments where memory is constrained can check this and + free the match data block if the heap frames vector has become too big. + + +MATCHING A PATTERN: THE TRADITIONAL FUNCTION + + int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext); + + The function pcre2_match() is called to match a subject string against + a compiled pattern, which is passed in the code argument. You can call + pcre2_match() with the same code argument as many times as you like, in + order to find multiple matches in the subject string or to match dif- + ferent subject strings with the same pattern. + + This function is the main matching facility of the library, and it op- + erates in a Perl-like manner. For specialist use there is also an al- + ternative matching function, which is described below in the section + about the pcre2_dfa_match() function. + + Here is an example of a simple call to pcre2_match(): + + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL); /* a match context; NULL means use defaults */ + + If the subject string is zero-terminated, the length can be given as + PCRE2_ZERO_TERMINATED. A match context must be provided if certain less + common matching parameters are to be changed. For details, see the sec- + tion on the match context above. + + The string to be matched by pcre2_match() + + The subject string is passed to pcre2_match() as a pointer in subject, + a length in length, and a starting offset in startoffset. The length + and offset are in code units, not characters. That is, they are in + bytes for the 8-bit library, 16-bit code units for the 16-bit library, + and 32-bit code units for the 32-bit library, whether or not UTF pro- + cessing is enabled. As a special case, if subject is NULL and length is + zero, the subject is assumed to be an empty string. If length is non- + zero, an error occurs if subject is NULL. + + If startoffset is greater than the length of the subject, pcre2_match() + returns PCRE2_ERROR_BADOFFSET. When the starting offset is zero, the + search for a match starts at the beginning of the subject, and this is + by far the most common case. In UTF-8 or UTF-16 mode, the starting off- + set must point to the start of a character, or to the end of the sub- + ject (in UTF-32 mode, one code unit equals one character, so all off- + sets are valid). Like the pattern string, the subject may contain bi- + nary zeros. + + A non-zero starting offset is useful when searching for another match + in the same subject by calling pcre2_match() again after a previous + success. Setting startoffset differs from passing over a shortened + string and setting PCRE2_NOTBOL in the case of a pattern that begins + with any kind of lookbehind. For example, consider the pattern + + \Biss\B + + which finds occurrences of "iss" in the middle of words. (\B matches + only if the current position in the subject is not a word boundary.) + When applied to the string "Mississippi" the first call to + pcre2_match() finds the first occurrence. If pcre2_match() is called + again with just the remainder of the subject, namely "issippi", it does + not match, because \B is always false at the start of the subject, + which is deemed to be a word boundary. However, if pcre2_match() is + passed the entire string again, but with startoffset set to 4, it finds + the second occurrence of "iss" because it is able to look behind the + starting point to discover that it is preceded by a letter. + + Finding all the matches in a subject is tricky when the pattern can + match an empty string. It is possible to emulate Perl's /g behaviour by + first trying the match again at the same offset, with the + PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED options, and then if that + fails, advancing the starting offset and trying an ordinary match + again. There is some code that demonstrates how to do this in the + pcre2demo sample program. In the most general case, you have to check + to see if the newline convention recognizes CRLF as a newline, and if + so, and the current character is CR followed by LF, advance the start- + ing offset by two characters instead of one. + + If a non-zero starting offset is passed when the pattern is anchored, a + single attempt to match at the given offset is made. This can only suc- + ceed if the pattern does not require the match to be at the start of + the subject. In other words, the anchoring must be the result of set- + ting the PCRE2_ANCHORED option or the use of .* with PCRE2_DOTALL, not + by starting the pattern with ^ or \A. + + Option bits for pcre2_match() + + The unused bits of the options argument for pcre2_match() must be zero. + The only bits that may be set are PCRE2_ANCHORED, + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_EN- + DANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, + PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PAR- + TIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. + + Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not sup- + ported by the just-in-time (JIT) compiler. If it is set, JIT matching + is disabled and the interpretive code in pcre2_match() is run. + PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from + PCRE2_NO_JIT (obviously), the remaining options are supported for JIT + matching. + + PCRE2_ANCHORED + + The PCRE2_ANCHORED option limits pcre2_match() to matching at the first + matching position. If a pattern was compiled with PCRE2_ANCHORED, or + turned out to be anchored by virtue of its contents, it cannot be made + unachored at matching time. Note that setting the option at match time + disables JIT matching. + + PCRE2_COPY_MATCHED_SUBJECT + + By default, a pointer to the subject is remembered in the match data + block so that, after a successful match, it can be referenced by the + substring extraction functions. This means that the subject's memory + must not be freed until all such operations are complete. For some ap- + plications where the lifetime of the subject string is not guaranteed, + it may be necessary to make a copy of the subject string, but it is + wasteful to do this unless the match is successful. After a successful + match, if PCRE2_COPY_MATCHED_SUBJECT is set, the subject is copied and + the new pointer is remembered in the match data block instead of the + original subject pointer. The memory allocator that was used for the + match block itself is used. The copy is automatically freed when + pcre2_match_data_free() is called to free the match data block. It is + also automatically freed if the match data block is re-used for another + match operation. + + PCRE2_DISABLE_RECURSELOOP_CHECK + + This option is relevant only to pcre2_match() for interpretive match- + ing. It is ignored when JIT is used, and is forbidden for + pcre2_dfa_match(). + + The use of recursion in patterns can lead to infinite loops. In the in- + terpretive matcher these would be eventually caught by the match or + heap limits, but this could take a long time and/or use a lot of memory + if the limits are large. There is therefore a check at the start of + each recursion. If the same group is still active from a previous + call, and the current subject pointer is the same as it was at the + start of that group, and the furthest inspected character of the sub- + ject has not changed, an error is generated. + + There are rare cases of matches that would complete, but nevertheless + trigger this error. This option disables the check. It is provided + mainly for testing when comparing JIT and interpretive behaviour. + + PCRE2_ENDANCHORED + + If the PCRE2_ENDANCHORED option is set, any string that pcre2_match() + matches must be right at the end of the subject string. Note that set- + ting the option at match time disables JIT matching. + + PCRE2_NOTBOL + + This option specifies that first character of the subject string is not + the beginning of a line, so the circumflex metacharacter should not + match before it. Setting this without having set PCRE2_MULTILINE at + compile time causes circumflex never to match. This option affects only + the behaviour of the circumflex metacharacter. It does not affect \A. + + PCRE2_NOTEOL + + This option specifies that the end of the subject string is not the end + of a line, so the dollar metacharacter should not match it nor (except + in multiline mode) a newline immediately before it. Setting this with- + out having set PCRE2_MULTILINE at compile time causes dollar never to + match. This option affects only the behaviour of the dollar metacharac- + ter. It does not affect \Z or \z. + + PCRE2_NOTEMPTY + + An empty string is not considered to be a valid match if this option is + set. If there are alternatives in the pattern, they are tried. If all + the alternatives match the empty string, the entire match fails. For + example, if the pattern + + a?b? + + is applied to a string not beginning with "a" or "b", it matches an + empty string at the start of the subject. With PCRE2_NOTEMPTY set, this + match is not valid, so pcre2_match() searches further into the string + for occurrences of "a" or "b". + + PCRE2_NOTEMPTY_ATSTART + + This is like PCRE2_NOTEMPTY, except that it locks out an empty string + match only at the first matching position, that is, at the start of the + subject plus the starting offset. An empty string match later in the + subject is permitted. If the pattern is anchored, such a match can oc- + cur only if the pattern contains \K. + + PCRE2_NO_JIT + + By default, if a pattern has been successfully processed by + pcre2_jit_compile(), JIT is automatically used when pcre2_match() is + called with options that JIT supports. Setting PCRE2_NO_JIT disables + the use of JIT; it forces matching to be done by the interpreter. + + PCRE2_NO_UTF_CHECK + + When PCRE2_UTF is set at compile time, the validity of the subject as a + UTF string is checked unless PCRE2_NO_UTF_CHECK is passed to + pcre2_match() or PCRE2_MATCH_INVALID_UTF was passed to pcre2_compile(). + The latter special case is discussed in detail in the pcre2unicode doc- + umentation. + + In the default case, if a non-zero starting offset is given, the check + is applied only to that part of the subject that could be inspected + during matching, and there is a check that the starting offset points + to the first code unit of a character or to the end of the subject. If + there are no lookbehind assertions in the pattern, the check starts at + the starting offset. Otherwise, it starts at the length of the longest + lookbehind before the starting offset, or at the start of the subject + if there are not that many characters before the starting offset. Note + that the sequences \b and \B are one-character lookbehinds. + + The check is carried out before any other processing takes place, and a + negative error code is returned if the check fails. There are several + UTF error codes for each code unit width, corresponding to different + problems with the code unit sequence. There are discussions about the + validity of UTF-8 strings, UTF-16 strings, and UTF-32 strings in the + pcre2unicode documentation. + + If you know that your subject is valid, and you want to skip this check + for performance reasons, you can set the PCRE2_NO_UTF_CHECK option when + calling pcre2_match(). You might want to do this for the second and + subsequent calls to pcre2_match() if you are making repeated calls to + find multiple matches in the same subject string. + + Warning: Unless PCRE2_MATCH_INVALID_UTF was set at compile time, when + PCRE2_NO_UTF_CHECK is set at match time the effect of passing an in- + valid string as a subject, or an invalid value of startoffset, is unde- + fined. Your program may crash or loop indefinitely or give wrong re- + sults. + + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT + + These options turn on the partial matching feature. A partial match oc- + curs if the end of the subject string is reached successfully, but + there are not enough subject characters to complete the match. In addi- + tion, either at least one character must have been inspected or the + pattern must contain a lookbehind, or the pattern must be one that + could match an empty string. + + If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PAR- + TIAL_HARD) is set, matching continues by testing any remaining alterna- + tives. Only if no complete match can be found is PCRE2_ERROR_PARTIAL + returned instead of PCRE2_ERROR_NOMATCH. In other words, PCRE2_PAR- + TIAL_SOFT specifies that the caller is prepared to handle a partial + match, but only if no complete match can be found. + + If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this + case, if a partial match is found, pcre2_match() immediately returns + PCRE2_ERROR_PARTIAL, without considering any other alternatives. In + other words, when PCRE2_PARTIAL_HARD is set, a partial match is consid- + ered to be more important that an alternative complete match. + + There is a more detailed discussion of partial and multi-segment match- + ing, with examples, in the pcre2partial documentation. + + +NEWLINE HANDLING WHEN MATCHING + + When PCRE2 is built, a default newline convention is set; this is usu- + ally the standard convention for the operating system. The default can + be overridden in a compile context by calling pcre2_set_newline(). It + can also be overridden by starting a pattern string with, for example, + (*CRLF), as described in the section on newline conventions in the + pcre2pattern page. During matching, the newline choice affects the be- + haviour of the dot, circumflex, and dollar metacharacters. It may also + alter the way the match starting position is advanced after a match + failure for an unanchored pattern. + + When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is + set as the newline convention, and a match attempt for an unanchored + pattern fails when the current starting position is at a CRLF sequence, + and the pattern contains no explicit matches for CR or LF characters, + the match position is advanced by two characters instead of one, in + other words, to after the CRLF. + + The above rule is a compromise that makes the most common cases work as + expected. For example, if the pattern is .+A (and the PCRE2_DOTALL op- + tion is not set), it does not match the string "\r\nA" because, after + failing at the start, it skips both the CR and the LF before retrying. + However, the pattern [\r\n]A does match that string, because it con- + tains an explicit CR or LF reference, and so advances only by one char- + acter after the first failure. + + An explicit match for CR of LF is either a literal appearance of one of + those characters in the pattern, or one of the \r or \n or equivalent + octal or hexadecimal escape sequences. Implicit matches such as [^X] do + not count, nor does \s, even though it includes CR and LF in the char- + acters that it matches. + + Notwithstanding the above, anomalous effects may still occur when CRLF + is a valid newline sequence and explicit \r or \n escapes appear in the + pattern. + + +HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS + + uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data); + + PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data); + + In general, a pattern matches a certain portion of the subject, and in + addition, further substrings from the subject may be picked out by + parenthesized parts of the pattern. Following the usage in Jeffrey + Friedl's book, this is called "capturing" in what follows, and the + phrase "capture group" (Perl terminology) is used for a fragment of a + pattern that picks out a substring. PCRE2 supports several other kinds + of parenthesized group that do not cause substrings to be captured. The + pcre2_pattern_info() function can be used to find out how many capture + groups there are in a compiled pattern. + + You can use auxiliary functions for accessing captured substrings by + number or by name, as described in sections below. + + Alternatively, you can make direct use of the vector of PCRE2_SIZE val- + ues, called the ovector, which contains the offsets of captured + strings. It is part of the match data block. The function + pcre2_get_ovector_pointer() returns the address of the ovector, and + pcre2_get_ovector_count() returns the number of pairs of values it con- + tains. + + Within the ovector, the first in each pair of values is set to the off- + set of the first code unit of a substring, and the second is set to the + offset of the first code unit after the end of a substring. These val- + ues are always code unit offsets, not character offsets. That is, they + are byte offsets in the 8-bit library, 16-bit offsets in the 16-bit li- + brary, and 32-bit offsets in the 32-bit library. + + After a partial match (error return PCRE2_ERROR_PARTIAL), only the + first pair of offsets (that is, ovector[0] and ovector[1]) are set. + They identify the part of the subject that was partially matched. See + the pcre2partial documentation for details of partial matching. + + After a fully successful match, the first pair of offsets identifies + the portion of the subject string that was matched by the entire pat- + tern. The next pair is used for the first captured substring, and so + on. The value returned by pcre2_match() is one more than the highest + numbered pair that has been set. For example, if two substrings have + been captured, the returned value is 3. If there are no captured sub- + strings, the return value from a successful match is 1, indicating that + just the first pair of offsets has been set. + + If a pattern uses the \K escape sequence within a positive assertion, + the reported start of a successful match can be greater than the end of + the match. For example, if the pattern (?=ab\K) is matched against + "ab", the start and end offset values for the match are 2 and 0. + + If a capture group is matched repeatedly within a single match opera- + tion, it is the last portion of the subject that it matched that is re- + turned. + + If the ovector is too small to hold all the captured substring offsets, + as much as possible is filled in, and the function returns a value of + zero. If captured substrings are not of interest, pcre2_match() may be + called with a match data block whose ovector is of minimum length (that + is, one pair). + + It is possible for capture group number n+1 to match some part of the + subject when group n has not been used at all. For example, if the + string "abc" is matched against the pattern (a|(z))(bc) the return from + the function is 4, and groups 1 and 3 are matched, but 2 is not. When + this happens, both values in the offset pairs corresponding to unused + groups are set to PCRE2_UNSET. + + Offset values that correspond to unused groups at the end of the ex- + pression are also set to PCRE2_UNSET. For example, if the string "abc" + is matched against the pattern (abc)(x(yz)?)? groups 2 and 3 are not + matched. The return from the function is 2, because the highest used + capture group number is 1. The offsets for the second and third capture + groups (assuming the vector is large enough, of course) are set to + PCRE2_UNSET. + + Elements in the ovector that do not correspond to capturing parentheses + in the pattern are never changed. That is, if a pattern contains n cap- + turing parentheses, no more than ovector[0] to ovector[2n+1] are set by + pcre2_match(). The other elements retain whatever values they previ- + ously had. After a failed match attempt, the contents of the ovector + are unchanged. + + +OTHER INFORMATION ABOUT A MATCH + + PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data); + + PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data); + + As well as the offsets in the ovector, other information about a match + is retained in the match data block and can be retrieved by the above + functions in appropriate circumstances. If they are called at other + times, the result is undefined. + + After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a + failure to match (PCRE2_ERROR_NOMATCH), a mark name may be available. + The function pcre2_get_mark() can be called to access this name, which + can be specified in the pattern by any of the backtracking control + verbs, not just (*MARK). The same function applies to all the verbs. It + returns a pointer to the zero-terminated name, which is within the com- + piled pattern. If no name is available, NULL is returned. The length of + the name (excluding the terminating zero) is stored in the code unit + that precedes the name. You should use this length instead of relying + on the terminating zero if the name might contain a binary zero. + + After a successful match, the name that is returned is the last mark + name encountered on the matching path through the pattern. Instances of + backtracking verbs without names do not count. Thus, for example, if + the matching path contains (*MARK:A)(*PRUNE), the name "A" is returned. + After a "no match" or a partial match, the last encountered name is re- + turned. For example, consider this pattern: + + ^(*MARK:A)((*MARK:B)a|b)c + + When it matches "bc", the returned name is A. The B mark is "seen" in + the first branch of the group, but it is not on the matching path. On + the other hand, when this pattern fails to match "bx", the returned + name is B. + + Warning: By default, certain start-of-match optimizations are used to + give a fast "no match" result in some situations. For example, if the + anchoring is removed from the pattern above, there is an initial check + for the presence of "c" in the subject before running the matching en- + gine. This check fails for "bx", causing a match failure without seeing + any marks. You can disable the start-of-match optimizations by setting + the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or by starting + the pattern with (*NO_START_OPT). + + After a successful match, a partial match, or one of the invalid UTF + errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can + be called. After a successful or partial match it returns the code unit + offset of the character at which the match started. For a non-partial + match, this can be different to the value of ovector[0] if the pattern + contains the \K escape sequence. After a partial match, however, this + value is always the same as ovector[0] because \K does not affect the + result of a partial match. + + After a UTF check failure, pcre2_get_startchar() can be used to obtain + the code unit offset of the invalid UTF character. Details are given in + the pcre2unicode page. + + +ERROR RETURNS FROM pcre2_match() + + If pcre2_match() fails, it returns a negative number. This can be con- + verted to a text string by calling the pcre2_get_error_message() func- + tion (see "Obtaining a textual error message" below). Negative error + codes are also returned by other functions, and are documented with + them. The codes are given names in the header file. If UTF checking is + in force and an invalid UTF subject string is detected, one of a number + of UTF-specific negative error codes is returned. Details are given in + the pcre2unicode page. The following are the other errors that may be + returned by pcre2_match(): + + PCRE2_ERROR_NOMATCH + + The subject string did not match the pattern. + + PCRE2_ERROR_PARTIAL + + The subject string did not match, but it did match partially. See the + pcre2partial documentation for details of partial matching. + + PCRE2_ERROR_BADMAGIC + + PCRE2 stores a 4-byte "magic number" at the start of the compiled code, + to catch the case when it is passed a junk pointer. This is the error + that is returned when the magic number is not present. + + PCRE2_ERROR_BADMODE + + This error is given when a compiled pattern is passed to a function in + a library of a different code unit width, for example, a pattern com- + piled by the 8-bit library is passed to a 16-bit or 32-bit library + function. + + PCRE2_ERROR_BADOFFSET + + The value of startoffset was greater than the length of the subject. + + PCRE2_ERROR_BADOPTION + + An unrecognized bit was set in the options argument. + + PCRE2_ERROR_BADUTFOFFSET + + The UTF code unit sequence that was passed as a subject was checked and + found to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the + value of startoffset did not point to the beginning of a UTF character + or the end of the subject. + + PCRE2_ERROR_CALLOUT + + This error is never generated by pcre2_match() itself. It is provided + for use by callout functions that want to cause pcre2_match() or + pcre2_callout_enumerate() to return a distinctive error code. See the + pcre2callout documentation for details. + + PCRE2_ERROR_DEPTHLIMIT + + The nested backtracking depth limit was reached. + + PCRE2_ERROR_HEAPLIMIT + + The heap limit was reached. + + PCRE2_ERROR_INTERNAL + + An unexpected internal error has occurred. This error could be caused + by a bug in PCRE2 or by overwriting of the compiled pattern. + + PCRE2_ERROR_JIT_STACKLIMIT + + This error is returned when a pattern that was successfully studied us- + ing JIT is being matched, but the memory available for the just-in-time + processing stack is not large enough. See the pcre2jit documentation + for more details. + + PCRE2_ERROR_MATCHLIMIT + + The backtracking match limit was reached. + + PCRE2_ERROR_NOMEMORY + + Heap memory is used to remember backtracking points. This error is + given when the memory allocation function (default or custom) fails. + Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given if the + amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is + also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory alloca- + tion fails. + + PCRE2_ERROR_NULL + + Either the code, subject, or match_data argument was passed as NULL. + + PCRE2_ERROR_RECURSELOOP + + This error is returned when pcre2_match() detects a recursion loop + within the pattern. Specifically, it means that either the whole pat- + tern or a capture group has been called recursively for the second time + at the same position in the subject string. Some simple patterns that + might do this are detected and faulted at compile time, but more com- + plicated cases, in particular mutual recursions between two different + groups, cannot be detected until matching is attempted. + + +OBTAINING A TEXTUAL ERROR MESSAGE + + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, + PCRE2_SIZE bufflen); + + A text message for an error code from any PCRE2 function (compile, + match, or auxiliary) can be obtained by calling pcre2_get_error_mes- + sage(). The code is passed as the first argument, with the remaining + two arguments specifying a code unit buffer and its length in code + units, into which the text message is placed. The message is returned + in code units of the appropriate width for the library that is being + used. + + The returned message is terminated with a trailing zero, and the func- + tion returns the number of code units used, excluding the trailing + zero. If the error number is unknown, the negative error code PCRE2_ER- + ROR_BADDATA is returned. If the buffer is too small, the message is + truncated (but still with a trailing zero), and the negative error code + PCRE2_ERROR_NOMEMORY is returned. None of the messages are very long; + a buffer size of 120 code units is ample. + + +EXTRACTING CAPTURED SUBSTRINGS BY NUMBER + + int pcre2_substring_length_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_SIZE *length); + + int pcre2_substring_copy_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR *buffer, + PCRE2_SIZE *bufflen); + + int pcre2_substring_get_bynumber(pcre2_match_data *match_data, + uint32_t number, PCRE2_UCHAR **bufferptr, + PCRE2_SIZE *bufflen); + + void pcre2_substring_free(PCRE2_UCHAR *buffer); + + Captured substrings can be accessed directly by using the ovector as + described above. For convenience, auxiliary functions are provided for + extracting captured substrings as new, separate, zero-terminated + strings. A substring that contains a binary zero is correctly extracted + and has a further zero added on the end, but the result is not, of + course, a C string. + + The functions in this section identify substrings by number. The number + zero refers to the entire matched substring, with higher numbers refer- + ring to substrings captured by parenthesized groups. After a partial + match, only substring zero is available. An attempt to extract any + other substring gives the error PCRE2_ERROR_PARTIAL. The next section + describes similar functions for extracting captured substrings by name. + + If a pattern uses the \K escape sequence within a positive assertion, + the reported start of a successful match can be greater than the end of + the match. For example, if the pattern (?=ab\K) is matched against + "ab", the start and end offset values for the match are 2 and 0. In + this situation, calling these functions with a zero substring number + extracts a zero-length empty string. + + You can find the length in code units of a captured substring without + extracting it by calling pcre2_substring_length_bynumber(). The first + argument is a pointer to the match data block, the second is the group + number, and the third is a pointer to a variable into which the length + is placed. If you just want to know whether or not the substring has + been captured, you can pass the third argument as NULL. + + The pcre2_substring_copy_bynumber() function copies a captured sub- + string into a supplied buffer, whereas pcre2_substring_get_bynumber() + copies it into new memory, obtained using the same memory allocation + function that was used for the match data block. The first two argu- + ments of these functions are a pointer to the match data block and a + capture group number. + + The final arguments of pcre2_substring_copy_bynumber() are a pointer to + the buffer and a pointer to a variable that contains its length in code + units. This is updated to contain the actual number of code units used + for the extracted substring, excluding the terminating zero. + + For pcre2_substring_get_bynumber() the third and fourth arguments point + to variables that are updated with a pointer to the new memory and the + number of code units that comprise the substring, again excluding the + terminating zero. When the substring is no longer needed, the memory + should be freed by calling pcre2_substring_free(). + + The return value from all these functions is zero for success, or a + negative error code. If the pattern match failed, the match failure + code is returned. If a substring number greater than zero is used af- + ter a partial match, PCRE2_ERROR_PARTIAL is returned. Other possible + error codes are: + + PCRE2_ERROR_NOMEMORY + + The buffer was too small for pcre2_substring_copy_bynumber(), or the + attempt to get memory failed for pcre2_substring_get_bynumber(). + + PCRE2_ERROR_NOSUBSTRING + + There is no substring with that number in the pattern, that is, the + number is greater than the number of capturing parentheses. + + PCRE2_ERROR_UNAVAILABLE + + The substring number, though not greater than the number of captures in + the pattern, is greater than the number of slots in the ovector, so the + substring could not be captured. + + PCRE2_ERROR_UNSET + + The substring did not participate in the match. For example, if the + pattern is (abc)|(def) and the subject is "def", and the ovector con- + tains at least two capturing slots, substring number 1 is unset. + + +EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS + + int pcre2_substring_list_get(pcre2_match_data *match_data, + PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr); + + void pcre2_substring_list_free(PCRE2_UCHAR **list); + + The pcre2_substring_list_get() function extracts all available sub- + strings and builds a list of pointers to them. It also (optionally) + builds a second list that contains their lengths (in code units), ex- + cluding a terminating zero that is added to each of them. All this is + done in a single block of memory that is obtained using the same memory + allocation function that was used to get the match data block. + + This function must be called only after a successful match. If called + after a partial match, the error code PCRE2_ERROR_PARTIAL is returned. + + The address of the memory block is returned via listptr, which is also + the start of the list of string pointers. The end of the list is marked + by a NULL pointer. The address of the list of lengths is returned via + lengthsptr. If your strings do not contain binary zeros and you do not + therefore need the lengths, you may supply NULL as the lengthsptr argu- + ment to disable the creation of a list of lengths. The yield of the + function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the mem- + ory block could not be obtained. When the list is no longer needed, it + should be freed by calling pcre2_substring_list_free(). + + If this function encounters a substring that is unset, which can happen + when capture group number n+1 matches some part of the subject, but + group n has not been used at all, it returns an empty string. This can + be distinguished from a genuine zero-length substring by inspecting the + appropriate offset in the ovector, which contain PCRE2_UNSET for unset + substrings, or by calling pcre2_substring_length_bynumber(). + + +EXTRACTING CAPTURED SUBSTRINGS BY NAME + + int pcre2_substring_number_from_name(const pcre2_code *code, + PCRE2_SPTR name); + + int pcre2_substring_length_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_SIZE *length); + + int pcre2_substring_copy_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen); + + int pcre2_substring_get_byname(pcre2_match_data *match_data, + PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen); + + void pcre2_substring_free(PCRE2_UCHAR *buffer); + + To extract a substring by name, you first have to find associated num- + ber. For example, for this pattern: + + (a+)b(?\d+)... + + the number of the capture group called "xxx" is 2. If the name is known + to be unique (PCRE2_DUPNAMES was not set), you can find the number from + the name by calling pcre2_substring_number_from_name(). The first argu- + ment is the compiled pattern, and the second is the name. The yield of + the function is the group number, PCRE2_ERROR_NOSUBSTRING if there is + no group with that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is + more than one group with that name. Given the number, you can extract + the substring directly from the ovector, or use one of the "bynumber" + functions described above. + + For convenience, there are also "byname" functions that correspond to + the "bynumber" functions, the only difference being that the second ar- + gument is a name instead of a number. If PCRE2_DUPNAMES is set and + there are duplicate names, these functions scan all the groups with the + given name, and return the captured substring from the first named + group that is set. + + If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is + returned. If all groups with the name have numbers that are greater + than the number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is re- + turned. If there is at least one group with a slot in the ovector, but + no group is found to be set, PCRE2_ERROR_UNSET is returned. + + Warning: If the pattern uses the (?| feature to set up multiple capture + groups with the same number, as described in the section on duplicate + group numbers in the pcre2pattern page, you cannot use names to distin- + guish the different capture groups, because names are not included in + the compiled code. The matching process uses only numbers. For this + reason, the use of different names for groups with the same number + causes an error at compile time. + + +CREATING A NEW STRING WITH SUBSTITUTIONS + + int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, PCRE2_SPTR replacement, + PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer, + PCRE2_SIZE *outlengthptr); + + This function optionally calls pcre2_match() and then makes a copy of + the subject string in outputbuffer, replacing parts that were matched + with the replacement string, whose length is supplied in rlength, which + can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As + a special case, if replacement is NULL and rlength is zero, the re- + placement is assumed to be an empty string. If rlength is non-zero, an + error occurs if replacement is NULL. + + There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to re- + turn just the replacement string(s). The default action is to perform + just one replacement if the pattern matches, but there is an option + that requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL be- + low). + + If successful, pcre2_substitute() returns the number of substitutions + that were carried out. This may be zero if no match was found, and is + never greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A nega- + tive value is returned if an error is detected. + + Matches in which a \K item in a lookahead in the pattern causes the + match to end before it starts are not supported, and give rise to an + error return. For global replacements, matches in which \K in a lookbe- + hind causes the match to start earlier than the point that was reached + in the previous iteration are also not supported. + + The first seven arguments of pcre2_substitute() are the same as for + pcre2_match(), except that the partial matching options are not permit- + ted, and match_data may be passed as NULL, in which case a match data + block is obtained and freed within this function, using memory manage- + ment functions from the match context, if provided, or else those that + were used to allocate memory for the compiled code. + + If match_data is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the + provided block is used for all calls to pcre2_match(), and its contents + afterwards are the result of the final call. For global changes, this + will always be a no-match error. The contents of the ovector within the + match data block may or may not have been changed. + + As well as the usual options for pcre2_match(), a number of additional + options can be set in the options argument of pcre2_substitute(). One + such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external + match_data block must be provided, and it must have already been used + for an external call to pcre2_match() with the same pattern and subject + arguments. The data in the match_data block (return code, offset vec- + tor) is then used for the first substitution instead of calling + pcre2_match() from within pcre2_substitute(). This allows an applica- + tion to check for a match before choosing to substitute, without having + to repeat the match. + + The contents of the externally supplied match data block are not + changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI- + TUTE_GLOBAL is also set, pcre2_match() is called after the first sub- + stitution to check for further matches, but this is done using an in- + ternally obtained match data block, thus always leaving the external + block unchanged. + + The code argument is not used for matching before the first substitu- + tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, + even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in- + formation such as the UTF setting and the number of capturing parenthe- + ses in the pattern. + + The default action of pcre2_substitute() is to return a copy of the + subject string with matched substrings replaced. However, if PCRE2_SUB- + STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are + returned. In the global case, multiple replacements are concatenated in + the output buffer. Substitution callouts (see below) can be used to + separate them if necessary. + + The outlengthptr argument of pcre2_substitute() must point to a vari- + able that contains the length, in code units, of the output buffer. If + the function is successful, the value is updated to contain the length + in code units of the new string, excluding the trailing zero that is + automatically added. + + If the function is not successful, the value set via outlengthptr de- + pends on the type of error. For syntax errors in the replacement + string, the value is the offset in the replacement string where the er- + ror was detected. For other errors, the value is PCRE2_UNSET by de- + fault. This includes the case of the output buffer being too small, un- + less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. + + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output + buffer is too small. The default action is to return PCRE2_ERROR_NOMEM- + ORY immediately. If this option is set, however, pcre2_substitute() + continues to go through the motions of matching and substituting (with- + out, of course, writing anything) in order to compute the size of + buffer that is needed, which will include the extra space for the ter- + minating NUL. This value is passed back via the outlengthptr variable, + with the result of the function still being PCRE2_ERROR_NOMEMORY. + + Passing a buffer size of zero is a permitted way of finding out how + much memory is needed for given substitution. However, this does mean + that the entire operation is carried out twice. Depending on the appli- + cation, it may be more efficient to allocate a large buffer and free + the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- + FLOW_LENGTH. + + The replacement string, which is interpreted as a UTF string in UTF + mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An + invalid UTF replacement string causes an immediate return with the rel- + evant UTF error code. + + If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in- + terpreted in any way. By default, however, a dollar character is an es- + cape character that can specify the insertion of characters from cap- + ture groups and names from (*MARK) or other control verbs in the pat- + tern. Dollar is the only escape character (backslash is treated as lit- + eral). The following forms are recognized: + + $$ insert a dollar character + $n or ${n} insert the contents of group n + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string + $*MARK or ${*MARK} insert a control verb name + + Either a group number or a group name can be given for n, for example + $2 or $NAME. Curly brackets are required only if the following charac- + ter would be interpreted as part of the number or name. The number may + be zero to include the entire matched string. For example, if the pat- + tern a(b)c is matched with "=abc=" and the replacement string + "+$1$0$1+", the result is "=+babcb+=". + + The JavaScript form $, where the angle brackets are part of the + syntax, is also recognized for group names, but not for group numbers + or *MARK. + + $*MARK inserts the name from the last encountered backtracking control + verb on the matching path that has a name. (*MARK) must always include + a name, but the other verbs need not. For example, in the case of + (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B) + the relevant name is "B". This facility can be used to perform simple + simultaneous substitutions, as this pcre2test example shows: + + /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange + + PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject + string, replacing every matching substring. If this option is not set, + only the first matching substring is replaced. The search for matches + takes place in the original subject string (that is, previous replace- + ments do not affect it). Iteration is implemented by advancing the + startoffset value for each search, which is always passed the entire + subject string. If an offset limit is set in the match context, search- + ing stops when that limit is reached. + + You can restrict the effect of a global substitution to a portion of + the subject string by setting either or both of startoffset and an off- + set limit. Here is a pcre2test example: + + /B/g,replace=!,use_offset_limit + ABC ABC ABC ABC\=offset=3,offset_limit=12 + 2: ABC A!C A!C ABC + + When continuing with global substitutions after matching a substring + with zero length, an attempt to find a non-empty match at the same off- + set is performed. If this is not successful, the offset is advanced by + one character except when CRLF is a valid newline sequence and the next + two characters are CR, LF. In this case, the offset is advanced by two + characters. + + PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that + do not appear in the pattern to be treated as unset groups. This option + should be used with care, because it means that a typo in a group name + or number no longer causes the PCRE2_ERROR_NOSUBSTRING error. + + PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un- + known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated + as empty strings when inserted as described above. If this option is + not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN- + SET error. This option does not influence the extended substitution + syntax described below. + + PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the + replacement string. Without this option, only the dollar character is + special, and only the group insertion forms listed above are valid. + When PCRE2_SUBSTITUTE_EXTENDED is set, several things change: + + Firstly, backslash in a replacement string is interpreted as an escape + character. The usual forms such as \x{ddd} can be used to specify par- + ticular character codes, and backslash followed by any non-alphanumeric + character quotes that character. Extended quoting can be coded using + \Q...\E, exactly as in pattern strings. The escapes \b and \v are in- + terpreted as the characters backspace and vertical tab, respectively. + + The interpretation of backslash followed by one or more digits is the + same as in a pattern, which in Perl has some ambiguities. Details are + given in the pcre2pattern page. + + The Python form \g, where the angle brackets are part of the syntax + and n is either a group name or number, is recognized as an altertive + way of inserting the contents of a group, for example \g<3>. + + There are also four escape sequences for forcing the case of inserted + letters. Case forcing applies to all inserted characters, including + those from capture groups and letters within \Q...\E quoted sequences. + The insertion mechanism has three states: no case forcing, force upper + case, and force lower case. The escape sequences change the current + state: \U and \L change to upper or lower case forcing, respectively, + and \E (when not terminating a \Q quoted sequence) reverts to no case + forcing. The sequences \u and \l force the next character (if it is a + letter) to upper or lower case, respectively, and then the state auto- + matically reverts to no case forcing. + + However, if \u is immediately followed by \L or \l is immediately fol- + lowed by \U, the next character's case is forced by the first escape + sequence, and subsequent characters by the second. This provides a "ti- + tle casing" facility that can be applied to group captures. For exam- + ple, if group 1 has captured "heLLo", the replacement string "\u\L$1" + becomes "Hello". + + If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, + Unicode properties are used for case forcing characters whose code + points are greater than 127. However, only simple case folding, as de- + termined by the Unicode file CaseFolding.txt is supported. PCRE2 does + not support language-specific special casing rules such as using dif- + ferent lower case Greek sigmas in the middle and ends of words (as de- + fined in the Unicode file SpecialCasing.txt). + + Note that case forcing sequences such as \U...\E do not nest. For exam- + ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final + \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX- + TRA_ALT_BSUX options do not apply to replacement strings. + + The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more + flexibility to capture group substitution. The syntax is similar to + that used by Bash: + + ${n:-string} + ${n:+string1:string2} + + As in the simple case, n may be a group number or a name. The first + form specifies a default value. If group n is set, its value is in- + serted; if not, the string is expanded and the result inserted. The + second form specifies strings that are expanded and inserted when group + n is set or unset, respectively. The first form is just a convenient + shorthand for + + ${n:+${n}:string} + + Backslash can be used to escape colons and closing curly brackets in + the replacement strings. A change of the case forcing state within a + replacement string remains in force afterwards, as shown in this + pcre2test example: + + /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo + body + 1: hello + somebody + 1: HELLO + + The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended + substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un- + known groups in the extended syntax forms to be treated as unset. + + If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, + PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrele- + vant and are ignored. + + Substitution errors + + In the event of an error, pcre2_substitute() returns a negative error + code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors + from pcre2_match() are passed straight back. + + PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser- + tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. + + PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ- + ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) + when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN- + SET_EMPTY is not set. + + PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big + enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size + of buffer that is needed is returned via outlengthptr. Note that this + does not happen by default. + + PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the + match_data argument is NULL or if the subject or replacement arguments + are NULL. For backward compatibility reasons an exception is made for + the replacement argument if the rlength argument is also 0. + + PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in + the replacement string, with more particular errors being PCRE2_ER- + ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE + (closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax + error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN + (the pattern match ended before it started or the match started earlier + than the current position in the subject, which can happen if \K is + used in an assertion). + + As for all PCRE2 errors, a text message that describes the error can be + obtained by calling the pcre2_get_error_message() function (see "Ob- + taining a textual error message" above). + + Substitution callouts + + int pcre2_set_substitute_callout(pcre2_match_context *mcontext, + int (*callout_function)(pcre2_substitute_callout_block *, void *), + void *callout_data); + + The pcre2_set_substitution_callout() function can be used to specify a + callout function for pcre2_substitute(). This information is passed in + a match context. The callout function is called after each substitution + has been processed, but it can cause the replacement not to happen. + + The callout function is not called for simulated substitutions that + happen as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In + this mode, when substitution processing exceeds the buffer space pro- + vided by the caller, processing continues by counting code units. The + simulation is unable to populate the callout block, and so the simula- + tion is pessimistic about the required buffer size. Whichever is larger + of accepted or rejected substitution is reported as the required size. + Therefore, the returned buffer length may be an overestimate (without a + substitution callout, it is normally an exact measurement). + + The first argument of the callout function is a pointer to a substitute + callout block structure, which contains the following fields, not nec- + essarily in this order: + + uint32_t version; + uint32_t subscount; + PCRE2_SPTR input; + PCRE2_SPTR output; + PCRE2_SIZE *ovector; + uint32_t oveccount; + PCRE2_SIZE output_offsets[2]; + + The version field contains the version number of the block format. The + current version is 0. The version number will increase in future if + more fields are added, but the intention is never to remove any of the + existing fields. + + The subscount field is the number of the current match. It is 1 for the + first callout, 2 for the second, and so on. The input and output point- + ers are copies of the values passed to pcre2_substitute(). + + The ovector field points to the ovector, which contains the result of + the most recent match. The oveccount field contains the number of pairs + that are set in the ovector, and is always greater than zero. + + The output_offsets vector contains the offsets of the replacement in + the output string. This has already been processed for dollar and (if + requested) backslash substitutions as described above. + + The second argument of the callout function is the value passed as + callout_data when the function was registered. The value returned by + the callout function is interpreted as follows: + + If the value is zero, the replacement is accepted, and, if PCRE2_SUB- + STITUTE_GLOBAL is set, processing continues with a search for the next + match. If the value is not zero, the current replacement is not ac- + cepted. If the value is greater than zero, processing continues when + PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero + or PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied + to the output and the call to pcre2_substitute() exits, returning the + number of matches so far. + + Substitution case callouts + + int pcre2_set_substitute_case_callout(pcre2_match_context *mcontext, + PCRE2_SIZE (*callout_function)(PCRE2_SPTR, PCRE2_SIZE, + PCRE2_UCHAR *, PCRE2_SIZE, + int, void *), + void *callout_data); + + The pcre2_set_substitution_case_callout() function can be used to spec- + ify a callout function for pcre2_substitute() to use when performing + case transformations. This does not affect any case insensitivity be- + haviour when performing a match, but only the user-visible transforma- + tions performed when processing a substitution such as: + + pcre2_substitute(..., "\\U$1", ...) + + The default case transformations applied by PCRE2 are reasonably com- + plete, and, in UTF or UCP mode, perform the simple locale-invariant + case transformations as specified by Unicode. This is suitable for the + internal (invisible) case-equivalence procedures used during pattern + matching, but an application may wish to use more sophisticated locale- + aware processing for the user-visible substitution transformations. + + One example implementation of the callout_function using the ICU li- + brary would be: + + PCRE2_SIZE + icu_case_callout( + PCRE2_SPTR input, PCRE2_SIZE input_len, + PCRE2_UCHAR *output, PCRE2_SIZE output_cap, + int to_case, void *data_ptr) + { + UErrorCode err = U_ZERO_ERROR; + int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER + ? u_strToLower(output, output_cap, input, input_len, NULL, &err) + : to_case == PCRE2_SUBSTITUTE_CASE_UPPER + ? u_strToUpper(output, output_cap, input, input_len, NULL, &err) + : u_strToTitle(output, output_cap, input, input_len, &first_char_only, + NULL, &err); + if (U_FAILURE(err)) return (~(PCRE2_SIZE)0); + return r; + } + + The first and second arguments of the case callout function are the + Unicode string to transform. + + The third and fourth arguments are the output buffer and its capacity. + + The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, + PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. + PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed + to the callout to indicate that the case of the entire callout input + should be case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed + to indicate that only the first character or glyph should be trans- + formed to Unicode titlecase and the rest to Unicode lowercase (note + that titlecasing sometimes uses Unicode properties to titlecase each + word in a string; but PCRE2 is requesting that only the single leading + character is to be titlecased). + + The sixth argument is the callout_data supplied to pcre2_set_substi- + tute_case_callout(). + + The resulting string in the destination buffer may be larger or smaller + than the input, if the casing rules merge or split characters. The re- + turn value is the length required for the output string. If a buffer of + sufficient size was provided to the callout, then the result must be + written to the buffer and the number of code units returned. If the re- + sult does not fit in the provided buffer, then the required capacity + must be returned and PCRE2 will not make use of the output buffer. + PCRE2 provides input and output buffers which overlap, so the callout + must support this by suitable internal buffering. + + Alternatively, if the callout wishes to indicate an error, then it may + return (~(PCRE2_SIZE)0). In this case pcre2_substitute() will immedi- + ately fail with error PCRE2_ERROR_REPLACECASE. + + When a case callout is combined with the PCRE2_SUBSTITUTE_OVER- + FLOW_LENGTH option, there are situations when pcre2_substitute() will + return an underestimate of the required buffer size. If you call + pcre2_substitute() once with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the + input buffer is too small for the replacement string to be constructed, + then instead of calling the case callout, pcre2_substitute() will make + an estimate of the required buffer size. The second call should also + pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that second call is not + guaranteed to succeed either, if the case callout requires more buffer + space than expected. The caller must make repeated attempts in a loop. + + +DUPLICATE CAPTURE GROUP NAMES + + int pcre2_substring_nametable_scan(const pcre2_code *code, + PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); + + When a pattern is compiled with the PCRE2_DUPNAMES option, names for + capture groups are not required to be unique. Duplicate names are al- + ways allowed for groups with the same number, created by using the (?| + feature. Indeed, if such groups are named, they are required to use the + same names. + + Normally, patterns that use duplicate names are such that in any one + match, only one of each set of identically-named groups participates. + An example is shown in the pcre2pattern documentation. + + When duplicates are present, pcre2_substring_copy_byname() and + pcre2_substring_get_byname() return the first substring corresponding + to the given name that is set. Only if none are set is PCRE2_ERROR_UN- + SET is returned. The pcre2_substring_number_from_name() function re- + turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate + names. + + If you want to get full details of all captured substrings for a given + name, you must use the pcre2_substring_nametable_scan() function. The + first argument is the compiled pattern, and the second is the name. If + the third and fourth arguments are NULL, the function returns a group + number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. + + When the third and fourth arguments are not NULL, they must be pointers + to variables that are updated by the function. After it has run, they + point to the first and last entries in the name-to-number table for the + given name, and the function returns the length of each entry in code + units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are + no entries for the given name. + + The format of the name table is described above in the section entitled + Information about a pattern. Given all the relevant entries for the + name, you can extract each of their numbers, and hence the captured + data. + + +FINDING ALL POSSIBLE MATCHES AT ONE POSITION + + The traditional matching function uses a similar algorithm to Perl, + which stops when it finds the first match at a given point in the sub- + ject. If you want to find all possible matches, or the longest possible + match at a given position, consider using the alternative matching + function (see below) instead. If you cannot use the alternative func- + tion, you can kludge it up by making use of the callout facility, which + is described in the pcre2callout documentation. + + What you have to do is to insert a callout right at the end of the pat- + tern. When your callout function is called, extract and save the cur- + rent matched substring. Then return 1, which forces pcre2_match() to + backtrack and try other alternatives. Ultimately, when it runs out of + matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH. + + +MATCHING A PATTERN: THE ALTERNATIVE FUNCTION + + int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, + PCRE2_SIZE length, PCRE2_SIZE startoffset, + uint32_t options, pcre2_match_data *match_data, + pcre2_match_context *mcontext, + int *workspace, PCRE2_SIZE wscount); + + The function pcre2_dfa_match() is called to match a subject string + against a compiled pattern, using a matching algorithm that scans the + subject string just once (not counting lookaround assertions), and does + not backtrack (except when processing lookaround assertions). This has + different characteristics to the normal algorithm, and is not compati- + ble with Perl. Some of the features of PCRE2 patterns are not sup- + ported. Nevertheless, there are times when this kind of matching can be + useful. For a discussion of the two matching algorithms, and a list of + features that pcre2_dfa_match() does not support, see the pcre2matching + documentation. + + The arguments for the pcre2_dfa_match() function are the same as for + pcre2_match(), plus two extras. The ovector within the match data block + is used in a different way, and this is described below. The other com- + mon arguments are used in the same way as for pcre2_match(), so their + description is not repeated here. + + The two additional arguments provide workspace for the function. The + workspace vector should contain at least 20 elements. It is used for + keeping track of multiple paths through the pattern tree. More work- + space is needed for patterns and subjects where there are a lot of po- + tential matches. + + Here is an example of a simple call to pcre2_dfa_match(): + + int wspace[20]; + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_dfa_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL, /* a match context; NULL means use defaults */ + wspace, /* working space vector */ + 20); /* number of elements (NOT size in bytes) */ + + Option bits for pcre2_dfa_match() + + The unused bits of the options argument for pcre2_dfa_match() must be + zero. The only bits that may be set are PCRE2_ANCHORED, + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO- + TEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, + PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and + PCRE2_DFA_RESTART. All but the last four of these are exactly the same + as for pcre2_match(), so their description is not repeated here. + + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT + + These have the same general effect as they do for pcre2_match(), but + the details are slightly different. When PCRE2_PARTIAL_HARD is set for + pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the + subject is reached and there is still at least one matching possibility + that requires additional characters. This happens even if some complete + matches have already been found. When PCRE2_PARTIAL_SOFT is set, the + return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL + if the end of the subject is reached, there have been no complete + matches, but there is still at least one matching possibility. The por- + tion of the string that was inspected when the longest partial match + was found is set as the first matching string in both cases. There is a + more detailed discussion of partial and multi-segment matching, with + examples, in the pcre2partial documentation. + + PCRE2_DFA_SHORTEST + + Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to + stop as soon as it has found one match. Because of the way the alterna- + tive algorithm works, this is necessarily the shortest possible match + at the first possible matching point in the subject string. + + PCRE2_DFA_RESTART + + When pcre2_dfa_match() returns a partial match, it is possible to call + it again, with additional subject characters, and have it continue with + the same match. The PCRE2_DFA_RESTART option requests this action; when + it is set, the workspace and wscount options must reference the same + vector as before because data about the match so far is left in them + after a partial match. There is more discussion of this facility in the + pcre2partial documentation. + + Successful returns from pcre2_dfa_match() + + When pcre2_dfa_match() succeeds, it may have matched more than one sub- + string in the subject. Note, however, that all the matches from one run + of the function start at the same point in the subject. The shorter + matches are all initial substrings of the longer matches. For example, + if the pattern + + <.*> + + is matched against the string + + This is no more + + the three matched strings are + + + + + + On success, the yield of the function is a number greater than zero, + which is the number of matched substrings. The offsets of the sub- + strings are returned in the ovector, and can be extracted by number in + the same way as for pcre2_match(), but the numbers bear no relation to + any capture groups that may exist in the pattern, because DFA matching + does not support capturing. + + Calls to the convenience functions that extract substrings by name re- + turn the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used af- + ter a DFA match. The convenience functions that extract substrings by + number never return PCRE2_ERROR_NOSUBSTRING. + + The matched strings are stored in the ovector in reverse order of + length; that is, the longest matching string is first. If there were + too many matches to fit into the ovector, the yield of the function is + zero, and the vector is filled with the longest matches. + + NOTE: PCRE2's "auto-possessification" optimization usually applies to + character repeats at the end of a pattern (as well as internally). For + example, the pattern "a\d+" is compiled as if it were "a\d++". For DFA + matching, this means that only one possible match is found. If you re- + ally do want multiple matches in such cases, either use an ungreedy re- + peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com- + piling. + + Error returns from pcre2_dfa_match() + + The pcre2_dfa_match() function returns a negative number when it fails. + Many of the errors are the same as for pcre2_match(), as described + above. There are in addition the following errors that are specific to + pcre2_dfa_match(): + + PCRE2_ERROR_DFA_UITEM + + This return is given if pcre2_dfa_match() encounters an item in the + pattern that it does not support, for instance, the use of \C in a UTF + mode or a backreference. + + PCRE2_ERROR_DFA_UCOND + + This return is given if pcre2_dfa_match() encounters a condition item + that uses a backreference for the condition, or a test for recursion in + a specific capture group. These are not supported. + + PCRE2_ERROR_DFA_UINVALID_UTF + + This return is given if pcre2_dfa_match() is called for a pattern that + was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for + DFA matching. + + PCRE2_ERROR_DFA_WSSIZE + + This return is given if pcre2_dfa_match() runs out of space in the + workspace vector. + + PCRE2_ERROR_DFA_RECURSE + + When a recursion or subroutine call is processed, the matching function + calls itself recursively, using private memory for the ovector and + workspace. This error is given if the internal ovector is not large + enough. This should be extremely rare, as a vector of size 1000 is + used. + + PCRE2_ERROR_DFA_BADRESTART + + When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option, + some plausibility checks are made on the contents of the workspace, + which should contain data about the previous partial match. If any of + these checks fail, this error is given. + + +SEE ALSO + + pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3), + pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 26 December 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 26 December 2024 PCRE2API(3) +------------------------------------------------------------------------------ + + +PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +BUILDING PCRE2 + + PCRE2 is distributed with a configure script that can be used to build + the library in Unix-like environments using the applications known as + Autotools. Also in the distribution are files to support building using + CMake instead of configure. The text file README contains general in- + formation about building with Autotools (some of which is repeated be- + low), and also has some comments about building on various operating + systems. The files in the vms directory support building under OpenVMS. + There is a lot more information about building PCRE2 without using Au- + totools (including information about using CMake and building "by + hand") in the text file called NON-AUTOTOOLS-BUILD. You should consult + this file as well as the README file if you are building in a non-Unix- + like environment. + + +PCRE2 BUILD-TIME OPTIONS + + The rest of this document describes the optional features of PCRE2 that + can be selected when the library is compiled. It assumes use of the + configure script, where the optional features are selected or dese- + lected by providing options to configure before running the make com- + mand. However, the same options can be selected in both Unix-like and + non-Unix-like environments if you are using CMake instead of configure + to build PCRE2. + + If you are not using Autotools or CMake, option selection can be done + by editing the config.h file, or by passing parameter settings to the + compiler, as described in NON-AUTOTOOLS-BUILD. + + The complete list of options for configure (which includes the standard + ones such as the selection of the installation directory) can be ob- + tained by running + + ./configure --help + + The following sections include descriptions of "on/off" options whose + names begin with --enable or --disable. Because of the way that config- + ure works, --enable and --disable always come in pairs, so the comple- + mentary option always exists as well, but as it specifies the default, + it is not described. Options that specify values have names that start + with --with. At the end of a configure run, a summary of the configura- + tion is output. + + +BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES + + By default, a library called libpcre2-8 is built, containing functions + that take string arguments contained in arrays of bytes, interpreted + either as single-byte characters, or UTF-8 strings. You can also build + two other libraries, called libpcre2-16 and libpcre2-32, which process + strings that are contained in arrays of 16-bit and 32-bit code units, + respectively. These can be interpreted either as single-unit characters + or UTF-16/UTF-32 strings. To build these additional libraries, add one + or both of the following to the configure command: + + --enable-pcre2-16 + --enable-pcre2-32 + + If you do not want the 8-bit library, add + + --disable-pcre2-8 + + as well. At least one of the three libraries must be built. Note that + the POSIX wrapper is for the 8-bit library only, and that pcre2grep is + an 8-bit program. Neither of these are built if you select only the + 16-bit or 32-bit libraries. + + +BUILDING SHARED AND STATIC LIBRARIES + + The Autotools PCRE2 building process uses libtool to build both shared + and static libraries by default. You can suppress an unwanted library + by adding one of + + --disable-shared + --disable-static + + to the configure command. Setting --disable-shared ensures that PCRE2 + libraries are built as static libraries. The binaries that are then + created as part of the build process (for example, pcre2test and + pcre2grep) are linked statically with one or more PCRE2 libraries, but + may also be dynamically linked with other libraries such as libc. If + you want these binaries to be fully statically linked, you can set LD- + FLAGS like this: + + LDFLAGS=--static ./configure --disable-shared + + Note the two hyphens in --static. Of course, this works only if static + versions of all the relevant libraries are available for linking. + + +UNICODE AND UTF SUPPORT + + By default, PCRE2 is built with support for Unicode and UTF character + strings. To build it without Unicode support, add + + --disable-unicode + + to the configure command. This setting applies to all three libraries. + It is not possible to build one library with Unicode support and an- + other without in the same configuration. + + Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, + UTF-16 or UTF-32. To do that, applications that use the library can set + the PCRE2_UTF option when they call pcre2_compile() to compile a pat- + tern. Alternatively, patterns may be started with (*UTF) unless the + application has locked this out by setting PCRE2_NEVER_UTF. + + UTF support allows the libraries to process character code points up to + 0x10ffff in the strings that they handle. Unicode support also gives + access to the Unicode properties of characters, using pattern escapes + such as \P, \p, and \X. Only the general category properties such as Lu + and Nd, script names, and some bi-directional properties are supported. + Details are given in the pcre2pattern documentation. + + Pattern escapes such as \d and \w do not by default make use of Unicode + properties. The application can request that they do by setting the + PCRE2_UCP option. Unless the application has set PCRE2_NEVER_UCP, a + pattern may also request this by starting with (*UCP). + + +DISABLING THE USE OF \C + + The \C escape sequence, which matches a single code unit, even in a UTF + mode, can cause unpredictable behaviour because it may leave the cur- + rent matching point in the middle of a multi-code-unit character. The + application can lock it out by setting the PCRE2_NEVER_BACKSLASH_C op- + tion when calling pcre2_compile(). There is also a build-time option + + --enable-never-backslash-C + + (note the upper case C) which locks out the use of \C entirely. + + +JUST-IN-TIME COMPILER SUPPORT + + Just-in-time (JIT) compiler support is included in the build by speci- + fying + + --enable-jit + + This support is available only for certain hardware architectures. If + this option is set for an unsupported architecture, a building error + occurs. If in doubt, use + + --enable-jit=auto + + which enables JIT only if the current hardware is supported. You can + check if JIT is enabled in the configuration summary that is output at + the end of a configure run. If you are enabling JIT under SELinux you + may also want to add + + --enable-jit-sealloc + + which enables the use of an execmem allocator in JIT that is compatible + with SELinux. This has no effect if JIT is not enabled. See the + pcre2jit documentation for a discussion of JIT usage. When JIT support + is enabled, pcre2grep automatically makes use of it, unless you add + + --disable-pcre2grep-jit + + to the configure command. + + +NEWLINE RECOGNITION + + By default, PCRE2 interprets the linefeed (LF) character as indicating + the end of a line. This is the normal newline character on Unix-like + systems. You can compile PCRE2 to use carriage return (CR) instead, by + adding + + --enable-newline-is-cr + + to the configure command. There is also an --enable-newline-is-lf op- + tion, which explicitly specifies linefeed as the newline character. + + Alternatively, you can specify that line endings are to be indicated by + the two-character sequence CRLF (CR immediately followed by LF). If you + want this, add + + --enable-newline-is-crlf + + to the configure command. There is a fourth option, specified by + + --enable-newline-is-anycrlf + + which causes PCRE2 to recognize any of the three sequences CR, LF, or + CRLF as indicating a line ending. A fifth option, specified by + + --enable-newline-is-any + + causes PCRE2 to recognize any Unicode newline sequence. The Unicode + newline sequences are the three just mentioned, plus the single charac- + ters VT (vertical tab, U+000B), FF (form feed, U+000C), NEL (next line, + U+0085), LS (line separator, U+2028), and PS (paragraph separator, + U+2029). The final option is + + --enable-newline-is-nul + + which causes NUL (binary zero) to be set as the default line-ending + character. + + Whatever default line ending convention is selected when PCRE2 is built + can be overridden by applications that use the library. At build time + it is recommended to use the standard for your operating system. + + +WHAT \R MATCHES + + By default, the sequence \R in a pattern matches any Unicode newline + sequence, independently of what has been selected as the line ending + sequence. If you specify + + --enable-bsr-anycrlf + + the default is changed so that \R matches only CR, LF, or CRLF. What- + ever is selected when PCRE2 is built can be overridden by applications + that use the library. + + +HANDLING VERY LARGE PATTERNS + + Within a compiled pattern, offset values are used to point from one + part to another (for example, from an opening parenthesis to an alter- + nation metacharacter). By default, in the 8-bit and 16-bit libraries, + two-byte values are used for these offsets, leading to a maximum size + for a compiled pattern of around 64 thousand code units. This is suffi- + cient to handle all but the most gigantic patterns. Nevertheless, some + people do want to process truly enormous patterns, so it is possible to + compile PCRE2 to use three-byte or four-byte offsets by adding a set- + ting such as + + --with-link-size=3 + + to the configure command. The value given must be 2, 3, or 4. For the + 16-bit library, a value of 3 is rounded up to 4. In these libraries, + using longer offsets slows down the operation of PCRE2 because it has + to load additional data when handling them. For the 32-bit library the + value is always 4 and cannot be overridden; the value of --with-link- + size is ignored. + + +LIMITING PCRE2 RESOURCE USAGE + + The pcre2_match() function increments a counter each time it goes round + its main loop. Putting a limit on this counter controls the amount of + computing resource used by a single call to pcre2_match(). The limit + can be changed at run time, as described in the pcre2api documentation. + The default is 10 million, but this can be changed by adding a setting + such as + + --with-match-limit=500000 + + to the configure command. This setting also applies to the + pcre2_dfa_match() matching function, and to JIT matching (though the + counting is done differently). + + The pcre2_match() function uses heap memory to record backtracking + points. The more nested backtracking points there are (that is, the + deeper the search tree), the more memory is needed. There is an upper + limit, specified in kibibytes (units of 1024 bytes). This limit can be + changed at run time, as described in the pcre2api documentation. The + default limit (in effect unlimited) is 20 million. You can change this + by a setting such as + + --with-heap-limit=500 + + which limits the amount of heap to 500 KiB. This limit applies only to + interpretive matching in pcre2_match() and pcre2_dfa_match(), which may + also use the heap for internal workspace when processing complicated + patterns. This limit does not apply when JIT (which has its own memory + arrangements) is used. + + You can also explicitly limit the depth of nested backtracking in the + pcre2_match() interpreter. This limit defaults to the value that is set + for --with-match-limit. You can set a lower default limit by adding, + for example, + + --with-match-limit-depth=10000 + + to the configure command. This value can be overridden at run time. + This depth limit indirectly limits the amount of heap memory that is + used, but because the size of each backtracking "frame" depends on the + number of capturing parentheses in a pattern, the amount of heap that + is used before the limit is reached varies from pattern to pattern. + This limit was more useful in versions before 10.30, where function re- + cursion was used for backtracking. + + As well as applying to pcre2_match(), the depth limit also controls the + depth of recursive function calls in pcre2_dfa_match(). These are used + for lookaround assertions, atomic groups, and recursion within pat- + terns. The limit does not apply to JIT matching. + + +LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS + + Lookbehind assertions in which one or more branches can match a vari- + able number of characters are supported only if there is a maximum + matching length for each top-level branch. There is a limit to this + maximum that defaults to 255 characters. You can alter this default by + a setting such as + + --with-max-varlookbehind=100 + + The limit can be changed at runtime by calling pcre2_set_max_varlookbe- + hind(). Lookbehind assertions in which every branch matches a fixed + number of characters (not necessarily all the same) are not constrained + by this limit. + + +CREATING CHARACTER TABLES AT BUILD TIME + + PCRE2 uses fixed tables for processing characters whose code points are + less than 256. By default, PCRE2 is built with a set of tables that are + distributed in the file src/pcre2_chartables.c.dist. These tables are + for ASCII codes only. If you add + + --enable-rebuild-chartables + + to the configure command, the distributed tables are no longer used. + Instead, a program called pcre2_dftables is compiled and run. This out- + puts the source for new set of tables, created in the default locale of + your C run-time system. This method of replacing the tables does not + work if you are cross compiling, because pcre2_dftables needs to be run + on the local host and therefore not compiled with the cross compiler. + + If you need to create alternative tables when cross compiling, you will + have to do so "by hand". There may also be other reasons for creating + tables manually. To cause pcre2_dftables to be built on the local + host, run a normal compiling command, and then run the program with the + output file as its argument, for example: + + cc src/pcre2_dftables.c -o pcre2_dftables + ./pcre2_dftables src/pcre2_chartables.c + + This builds the tables in the default locale of the local host. If you + want to specify a locale, you must use the -L option: + + LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c + + You can also specify -b (with or without -L). This causes the tables to + be written in binary instead of as source code. A set of binary tables + can be loaded into memory by an application and passed to pcre2_com- + pile() in the same way as tables created by calling pcre2_maketables(). + The tables are just a string of bytes, independent of hardware charac- + teristics such as endianness. This means they can be bundled with an + application that runs in different environments, to ensure consistent + behaviour. + + +USING EBCDIC CODE + + PCRE2 assumes by default that it will run in an environment where the + character code is ASCII or Unicode, which is a superset of ASCII. This + is the case for most computer operating systems. PCRE2 can, however, be + compiled to run in an 8-bit EBCDIC environment by adding + + --enable-ebcdic --disable-unicode + + to the configure command. This setting implies --enable-rebuild-charta- + bles. You should only use it if you know that you are in an EBCDIC en- + vironment (for example, an IBM mainframe operating system). + + It is not possible to support both EBCDIC and UTF-8 codes in the same + version of the library. Consequently, --enable-unicode and --enable- + ebcdic are mutually exclusive. + + The EBCDIC character that corresponds to an ASCII LF is assumed to have + the value 0x15 by default. However, in some EBCDIC environments, 0x25 + is used. In such an environment you should use + + --enable-ebcdic-nl25 + + as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR + has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and + 0x25 is not chosen as LF is made to correspond to the Unicode NEL char- + acter (which, in Unicode, is 0x85). + + The options that select newline behaviour, such as --enable-newline-is- + cr, and equivalent run-time options, refer to these character values in + an EBCDIC environment. + + +PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS + + By default pcre2grep supports the use of callouts with string arguments + within the patterns it is matching. There are two kinds: one that gen- + erates output using local code, and another that calls an external pro- + gram or script. If --disable-pcre2grep-callout-fork is added to the + configure command, only the first kind of callout is supported; if + --disable-pcre2grep-callout is used, all callouts are completely ig- + nored. For more details of pcre2grep callouts, see the pcre2grep docu- + mentation. + + +PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT + + By default, pcre2grep reads all files as plain text. You can build it + so that it recognizes files whose names end in .gz or .bz2, and reads + them with libz or libbz2, respectively, by adding one or both of + + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 + + to the configure command. These options naturally require that the rel- + evant libraries are installed on your system. Configuration will fail + if they are not. + + +PCRE2GREP BUFFER SIZE + + pcre2grep uses an internal buffer to hold a "window" on the file it is + scanning, in order to be able to output "before" and "after" lines when + it finds a match. The default starting size of the buffer is 20KiB. The + buffer itself is three times this size, but because of the way it is + used for holding "before" lines, the longest line that is guaranteed to + be processable is the notional buffer size. If a longer line is encoun- + tered, pcre2grep automatically expands the buffer, up to a specified + maximum size, whose default is 1MiB or the starting size, whichever is + the larger. You can change the default parameter values by adding, for + example, + + --with-pcre2grep-bufsize=51200 + --with-pcre2grep-max-bufsize=2097152 + + to the configure command. The caller of pcre2grep can override these + values by using --buffer-size and --max-buffer-size on the command + line. + + +PCRE2TEST OPTION FOR LIBREADLINE SUPPORT + + If you add one of + + --enable-pcre2test-libreadline + --enable-pcre2test-libedit + + to the configure command, pcre2test is linked with the libreadline or- + libedit library, respectively, and when its input is from a terminal, + it reads it using the readline() function. This provides line-editing + and history facilities. Note that libreadline is GPL-licensed, so if + you distribute a binary of pcre2test linked in this way, there may be + licensing issues. These can be avoided by linking instead with libedit, + which has a BSD licence. + + Setting --enable-pcre2test-libreadline causes the -lreadline option to + be added to the pcre2test build. In many operating environments with a + system-installed readline library this is sufficient. However, in some + environments (e.g. if an unmodified distribution version of readline is + in use), some extra configuration may be necessary. The INSTALL file + for libreadline says this: + + "Readline uses the termcap functions, but does not link with + the termcap or curses library itself, allowing applications + which link with readline the to choose an appropriate library." + + If your environment has not been set up so that an appropriate library + is automatically included, you may need to add something like + + LIBS="-ncurses" + + immediately before the configure command. + + +INCLUDING DEBUGGING CODE + + If you add + + --enable-debug + + to the configure command, additional debugging code is included in the + build. This feature is intended for use by the PCRE2 maintainers. + + +DEBUGGING WITH VALGRIND SUPPORT + + If you add + + --enable-valgrind + + to the configure command, PCRE2 will use valgrind annotations to mark + certain memory regions as unaddressable. This allows it to detect in- + valid memory accesses, and is mostly useful for debugging PCRE2 itself. + + +CODE COVERAGE REPORTING + + If your C compiler is gcc, you can build a version of PCRE2 that can + generate a code coverage report for its test suite. To enable this, you + must install lcov version 1.6 or above. Then specify + + --enable-coverage + + to the configure command and build PCRE2 in the usual way. + + Note that using ccache (a caching C compiler) is incompatible with code + coverage reporting. If you have configured ccache to run automatically + on your system, you must set the environment variable + + CCACHE_DISABLE=1 + + before running make to build PCRE2, so that ccache is not used. + + When --enable-coverage is used, the following addition targets are + added to the Makefile: + + make coverage + + This creates a fresh coverage report for the PCRE2 test suite. It is + equivalent to running "make coverage-reset", "make coverage-baseline", + "make check", and then "make coverage-report". + + make coverage-reset + + This zeroes the coverage counters, but does nothing else. + + make coverage-baseline + + This captures baseline coverage information. + + make coverage-report + + This creates the coverage report. + + make coverage-clean-report + + This removes the generated coverage report without cleaning the cover- + age data itself. + + make coverage-clean-data + + This removes the captured coverage data without removing the coverage + files created at compile time (*.gcno). + + make coverage-clean + + This cleans all coverage data including the generated coverage report. + For more information about code coverage, see the gcov and lcov docu- + mentation. + + +DISABLING THE Z AND T FORMATTING MODIFIERS + + The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers + in environments other than old versions of Microsoft Visual Studio when + __STDC_VERSION__ is defined and has a value greater than or equal to + 199901L (indicating support for C99). However, there is at least one + environment that claims to be C99 but does not support these modifiers. + If + + --disable-percent-zt + + is specified, no use is made of the z or t modifiers. Instead of %td or + %zu, a suitable format is used depending in the size of long for the + platform. + + +SUPPORT FOR FUZZERS + + There is a special option for use by people who want to run fuzzing + tests on PCRE2: + + --enable-fuzz-support + + At present this applies only to the 8-bit library. If set, it causes an + extra library called libpcre2-fuzzsupport.a to be built, but not in- + stalled. This contains a single function called LLVMFuzzerTestOneIn- + put() whose arguments are a pointer to a string and the length of the + string. When called, this function tries to compile the string as a + pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the + string. + + Setting --enable-fuzz-support also causes a binary called pcre2fuz- + zcheck to be created. This is normally run under valgrind or used when + PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing + function and outputs information about what it is doing. The input + strings are specified by arguments: if an argument starts with "=" the + rest of it is a literal input string. Otherwise, it is assumed to be a + file name, and the contents of the file are the test string. + + +OBSOLETE OPTION + + In versions of PCRE2 prior to 10.30, there were two ways of handling + backtracking in the pcre2_match() function. The default was to use the + system stack, but if + + --disable-stack-for-recursion + + was set, memory on the heap was used. From release 10.30 onwards this + has changed (the stack is no longer used) and this option now does + nothing except give a warning. + + +SEE ALSO + + pcre2api(3), pcre2-config(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 16 April 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 16 April 2024 PCRE2BUILD(3) +------------------------------------------------------------------------------ + + +PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +SYNOPSIS + + #include + + int (*pcre2_callout)(pcre2_callout_block *, void *); + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + +DESCRIPTION + + PCRE2 provides a feature called "callout", which is a means of tem- + porarily passing control to the caller of PCRE2 in the middle of pat- + tern matching. The caller of PCRE2 provides an external function by + putting its entry point in a match context (see pcre2_set_callout() in + the pcre2api documentation). + + When using the pcre2_substitute() function, an additional callout fea- + ture is available. This does a callout after each change to the subject + string and is described in the pcre2api documentation; the rest of this + document is concerned with callouts during pattern matching. + + Within a regular expression, (?C) indicates a point at which the + external function is to be called. Different callout points can be + identified by putting a number less than 256 after the letter C. The + default value is zero. Alternatively, the argument may be a delimited + string. The starting delimiter must be one of ` ' " ^ % # $ { and the + ending delimiter is the same as the start, except for {, where the end- + ing delimiter is }. If the ending delimiter is needed within the + string, it must be doubled. For example, this pattern has two callout + points: + + (?C1)abc(?C"some ""arbitrary"" text")def + + If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, + PCRE2 automatically inserts callouts, all with number 255, before each + item in the pattern except for immediately before or after an explicit + callout. For example, if PCRE2_AUTO_CALLOUT is used with the pattern + + A(?C3)B + + it is processed as if it were + + (?C255)A(?C3)B(?C255) + + Here is a more complicated example: + + A(\d{2}|--) + + With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were + + (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) + + Notice that there is a callout before and after each parenthesis and + alternation bar. If the pattern contains a conditional group whose con- + dition is an assertion, an automatic callout is inserted immediately + before the condition. Such a callout may also be inserted explicitly, + for example: + + (?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de) + + This applies only to assertion conditions (because they are themselves + independent groups). + + Callouts can be useful for tracking the progress of pattern matching. + The pcre2test program has a pattern qualifier (/auto_callout) that sets + automatic callouts. When any callouts are present, the output from + pcre2test indicates how the pattern is being matched. This is useful + information when you are trying to optimize the performance of a par- + ticular pattern. + + +MISSING CALLOUTS + + You should be aware that, because of optimizations in the way PCRE2 + compiles and matches patterns, callouts sometimes do not happen exactly + as you might expect. + + Auto-possessification + + At compile time, PCRE2 "auto-possessifies" repeated items when it knows + that what follows cannot be part of the repeat. For example, a+[bc] is + compiled as if it were a++[bc]. The pcre2test output when this pattern + is compiled with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied + to the string "aaaa" is: + + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + No match + + This indicates that when matching [bc] fails, there is no backtracking + into a+ (because it is being treated as a++) and therefore the callouts + that would be taken for the backtracks do not occur. You can disable + the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to + pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In + this case, the output changes to this: + + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^^ [bc] + No match + + This time, when matching [bc] fails, the matcher backtracks into a+ and + tries again, repeatedly, until a+ itself fails. + + Automatic .* anchoring + + By default, an optimization is applied when .* is the first significant + item in a pattern. If PCRE2_DOTALL is set, so that the dot can match + any character, the pattern is automatically anchored. If PCRE2_DOTALL + is not set, a match can start only after an internal newline or at the + beginning of the subject, and pcre2_compile() remembers this. If a pat- + tern has more than one top-level branch, automatic anchoring occurs if + all branches are anchorable. + + This optimization is disabled, however, if .* is in an atomic group or + if there is a backreference to the capture group in which it appears. + It is also disabled if the pattern contains (*PRUNE) or (*SKIP). How- + ever, the presence of callouts does not affect it. + + For example, if the pattern .*\d is compiled with PCRE2_AUTO_CALLOUT + and applied to the string "aa", the pcre2test output is: + + --->aa + +0 ^ .* + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + No match + + This shows that all match attempts start at the beginning of the sub- + ject. In other words, the pattern is anchored. You can disable this op- + timization by passing PCRE2_NO_DOTSTAR_ANCHOR to pcre2_compile(), or + starting the pattern with (*NO_DOTSTAR_ANCHOR). In this case, the out- + put changes to: + + --->aa + +0 ^ .* + +2 ^ ^ \d + +2 ^^ \d + +2 ^ \d + +0 ^ .* + +2 ^^ \d + +2 ^ \d + No match + + This shows more match attempts, starting at the second subject charac- + ter. Another optimization, described in the next section, means that + there is no subsequent attempt to match with an empty subject. + + Other optimizations + + Other optimizations that provide fast "no match" results also affect + callouts. For example, if the pattern is + + ab(?C4)cd + + PCRE2 knows that any matching string must contain the letter "d". If + the subject string is "abyz", the lack of "d" means that matching + doesn't ever start, and the callout is never reached. However, with + "abyd", though the result is still no match, the callout is obeyed. + + For most patterns PCRE2 also knows the minimum length of a matching + string, and will immediately give a "no match" return without actually + running a match if the subject is not long enough, or, for unanchored + patterns, if it has been scanned far enough. + + You can disable these optimizations by passing the PCRE2_NO_START_OPTI- + MIZE option to pcre2_compile(), or by starting the pattern with + (*NO_START_OPT). This slows down the matching process, but does ensure + that callouts such as the example above are obeyed. + + +THE CALLOUT INTERFACE + + During matching, when PCRE2 reaches a callout point, if an external + function is provided in the match context, it is called. This applies + to both normal, DFA, and JIT matching. The first argument to the call- + out function is a pointer to a pcre2_callout block. The second argument + is the void * callout data that was supplied when the callout was set + up by calling pcre2_set_callout() (see the pcre2api documentation). The + callout block structure contains the following fields, not necessarily + in this order: + + uint32_t version; + uint32_t callout_number; + uint32_t capture_top; + uint32_t capture_last; + uint32_t callout_flags; + PCRE2_SIZE *offset_vector; + PCRE2_SPTR mark; + PCRE2_SPTR subject; + PCRE2_SIZE subject_length; + PCRE2_SIZE start_match; + PCRE2_SIZE current_position; + PCRE2_SIZE pattern_position; + PCRE2_SIZE next_item_length; + PCRE2_SIZE callout_string_offset; + PCRE2_SIZE callout_string_length; + PCRE2_SPTR callout_string; + + The version field contains the version number of the block format. The + current version is 2; the three callout string fields were added for + version 1, and the callout_flags field for version 2. If you are writ- + ing an application that might use an earlier release of PCRE2, you + should check the version number before accessing any of these fields. + The version number will increase in future if more fields are added, + but the intention is never to remove any of the existing fields. + + Fields for numerical callouts + + For a numerical callout, callout_string is NULL, and callout_number + contains the number of the callout, in the range 0-255. This is the + number that follows (?C for callouts that part of the pattern; it is + 255 for automatically generated callouts. + + Fields for string callouts + + For callouts with string arguments, callout_number is always zero, and + callout_string points to the string that is contained within the com- + piled pattern. Its length is given by callout_string_length. Duplicated + ending delimiters that were present in the original pattern string have + been turned into single characters, but there is no other processing of + the callout string argument. An additional code unit containing binary + zero is present after the string, but is not included in the length. + The delimiter that was used to start the string is also stored within + the pattern, immediately before the string itself. You can access this + delimiter as callout_string[-1] if you need it. + + The callout_string_offset field is the code unit offset to the start of + the callout argument string within the original pattern string. This is + provided for the benefit of applications such as script languages that + might need to report errors in the callout string within the pattern. + + Fields for all callouts + + The remaining fields in the callout block are the same for both kinds + of callout. + + The offset_vector field is a pointer to a vector of capturing offsets + (the "ovector"). You may read the elements in this vector, but you must + not change any of them. + + For calls to pcre2_match(), the offset_vector field is not (since re- + lease 10.30) a pointer to the actual ovector that was passed to the + matching function in the match data block. Instead it points to an in- + ternal ovector of a size large enough to hold all possible captured + substrings in the pattern. Note that whenever a recursion or subroutine + call within a pattern completes, the capturing state is reset to what + it was before. + + The capture_last field contains the number of the most recently cap- + tured substring, and the capture_top field contains one more than the + number of the highest numbered captured substring so far. If no sub- + strings have yet been captured, the value of capture_last is 0 and the + value of capture_top is 1. The values of these fields do not always + differ by one; for example, when the callout in the pattern + ((a)(b))(?C2) is taken, capture_last is 1 but capture_top is 4. + + The contents of ovector[2] to ovector[*2-1] can be in- + spected in order to extract substrings that have been matched so far, + in the same way as extracting substrings after a match has completed. + The values in ovector[0] and ovector[1] are always PCRE2_UNSET because + the match is by definition not complete. Substrings that have not been + captured but whose numbers are less than capture_top also have both of + their ovector slots set to PCRE2_UNSET. + + For DFA matching, the offset_vector field points to the ovector that + was passed to the matching function in the match data block for call- + outs at the top level, but to an internal ovector during the processing + of pattern recursions, lookarounds, and atomic groups. However, these + ovectors hold no useful information because pcre2_dfa_match() does not + support substring capturing. The value of capture_top is always 1 and + the value of capture_last is always 0 for DFA matching. + + The subject and subject_length fields contain copies of the values that + were passed to the matching function. + + The start_match field normally contains the offset within the subject + at which the current match attempt started. However, if the escape se- + quence \K has been encountered, this value is changed to reflect the + modified starting point. If the pattern is not anchored, the callout + function may be called several times from the same point in the pattern + for different starting points in the subject. + + The current_position field contains the offset within the subject of + the current match pointer. + + The pattern_position field contains the offset in the pattern string to + the next item to be matched. + + The next_item_length field contains the length of the next item to be + processed in the pattern string. When the callout is at the end of the + pattern, the length is zero. When the callout precedes an opening + parenthesis, the length includes meta characters that follow the paren- + thesis. For example, in a callout before an assertion such as (?=ab) + the length is 3. For an alternation bar or a closing parenthesis, the + length is one, unless a closing parenthesis is followed by a quanti- + fier, in which case its length is included. (This changed in release + 10.23. In earlier releases, before an opening parenthesis the length + was that of the entire group, and before an alternation bar or a clos- + ing parenthesis the length was zero.) + + The pattern_position and next_item_length fields are intended to help + in distinguishing between different automatic callouts, which all have + the same callout number. However, they are set for all callouts, and + are used by pcre2test to show the next item to be matched when display- + ing callout information. + + In callouts from pcre2_match() the mark field contains a pointer to the + zero-terminated name of the most recently passed (*MARK), (*PRUNE), or + (*THEN) item in the match, or NULL if no such items have been passed. + Instances of (*PRUNE) or (*THEN) without a name do not obliterate a + previous (*MARK). In callouts from the DFA matching function this field + always contains NULL. + + The callout_flags field is always zero in callouts from + pcre2_dfa_match() or when JIT is being used. When pcre2_match() without + JIT is used, the following bits may be set: + + PCRE2_CALLOUT_STARTMATCH + + This is set for the first callout after the start of matching for each + new starting position in the subject. + + PCRE2_CALLOUT_BACKTRACK + + This is set if there has been a matching backtrack since the previous + callout, or since the start of matching if this is the first callout + from a pcre2_match() run. + + Both bits are set when a backtrack has caused a "bumpalong" to a new + starting position in the subject. Output from pcre2test does not indi- + cate the presence of these bits unless the callout_extra modifier is + set. + + The information in the callout_flags field is provided so that applica- + tions can track and tell their users how matching with backtracking is + done. This can be useful when trying to optimize patterns, or just to + understand how PCRE2 works. There is no support in pcre2_dfa_match() + because there is no backtracking in DFA matching, and there is no sup- + port in JIT because JIT is all about maximimizing matching performance. + In both these cases the callout_flags field is always zero. + + +RETURN VALUES FROM CALLOUTS + + The external callout function returns an integer to PCRE2. If the value + is zero, matching proceeds as normal. If the value is greater than + zero, matching fails at the current point, but the testing of other + matching possibilities goes ahead, just as if a lookahead assertion had + failed. If the value is less than zero, the match is abandoned, and the + matching function returns the negative value. + + Negative values should normally be chosen from the set of PCRE2_ER- + ROR_xxx values. In particular, PCRE2_ERROR_NOMATCH forces a standard + "no match" failure. The error number PCRE2_ERROR_CALLOUT is reserved + for use by callout functions; it will never be used by PCRE2 itself. + + +CALLOUT ENUMERATION + + int pcre2_callout_enumerate(const pcre2_code *code, + int (*callback)(pcre2_callout_enumerate_block *, void *), + void *user_data); + + A script language that supports the use of string arguments in callouts + might like to scan all the callouts in a pattern before running the + match. This can be done by calling pcre2_callout_enumerate(). The first + argument is a pointer to a compiled pattern, the second points to a + callback function, and the third is arbitrary user data. The callback + function is called for every callout in the pattern in the order in + which they appear. Its first argument is a pointer to a callout enumer- + ation block, and its second argument is the user_data value that was + passed to pcre2_callout_enumerate(). The data block contains the fol- + lowing fields: + + version Block version number + pattern_position Offset to next item in pattern + next_item_length Length of next item in pattern + callout_number Number for numbered callouts + callout_string_offset Offset to string within pattern + callout_string_length Length of callout string + callout_string Points to callout string or is NULL + + The version number is currently 0. It will increase if new fields are + ever added to the block. The remaining fields are the same as their + namesakes in the pcre2_callout block that is used for callouts during + matching, as described above. + + Note that the value of pattern_position is unique for each callout. + However, if a callout occurs inside a group that is quantified with a + non-zero minimum or a fixed maximum, the group is replicated inside the + compiled pattern. For example, a pattern such as /(a){2}/ is compiled + as if it were /(a)(a)/. This means that the callout will be enumerated + more than once, but with the same value for pattern_position in each + case. + + The callback function should normally return zero. If it returns a non- + zero value, scanning the pattern stops, and that value is returned from + pcre2_callout_enumerate(). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 19 January 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 19 January 2024 PCRE2CALLOUT(3) +------------------------------------------------------------------------------ + + +PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +DIFFERENCES BETWEEN PCRE2 AND PERL + + This document describes some of the known differences in the ways that + PCRE2 and Perl handle regular expressions. The differences described + here are with respect to Perl version 5.38.0, but as both Perl and + PCRE2 are continually changing, the information may at times be out of + date. + + 1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, + the behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' + matches the next character unless it is the start of a newline se- + quence. This means that, if the newline setting is CR, CRLF, or NUL, + '.' will match the code point LF (0x0A) in ASCII/Unicode environments, + and NL (either 0x15 or 0x25) when using EBCDIC. In Perl, '.' appears + never to match LF, even when 0x0A is not a newline indicator. + + 2. PCRE2 has only a subset of Perl's Unicode support. Details of what + it does have are given in the pcre2unicode page. + + 3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized asser- + tions, but they do not mean what you might think. For example, (?!a){3} + does not assert that the next three characters are not "a". It just as- + serts that the next character is not "a" three times (in principle; + PCRE2 optimizes this to run the assertion just once). Perl allows some + repeat quantifiers on other assertions, for example, \b* , but these do + not seem to have any use. PCRE2 does not allow any kind of quantifier + on non-lookaround assertions. + + 4. If a braced quantifier such as {1,2} appears where there is nothing + to repeat (for example, at the start of a branch), PCRE2 raises an er- + ror whereas Perl treats the quantifier characters as literal. + + 5. Capture groups that occur inside negative lookaround assertions are + counted, but their entries in the offsets vector are set only when a + negative assertion is a condition that has a matching branch (that is, + the condition is false). Perl may set such capture groups in other + circumstances. + + 6. The following Perl escape sequences are not supported: \F, \l, \L, + \u, \U, and \N when followed by a character name. \N on its own, match- + ing a non-newline character, and \N{U+dd..}, matching a Unicode code + point, are supported. The escapes that modify the case of following + letters are implemented by Perl's general string-handling and are not + part of its pattern matching engine. If any of these are encountered by + PCRE2, an error is generated by default. However, if either of the + PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are + interpreted as ECMAScript interprets them. + + 7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 + is built with Unicode support (the default). The properties that can be + tested with \p and \P are limited to the general category properties + such as Lu and Nd, the derived properties Any and Lc (synonym L&), + script names such as Greek or Han, Bidi_Class, Bidi_Control, and a few + binary properties. Both PCRE2 and Perl support the Cs (surrogate) prop- + erty, but in PCRE2 its use is limited. See the pcre2pattern documenta- + tion for details. The long synonyms for property names that Perl sup- + ports (such as \p{Letter}) are not supported by PCRE2, nor is it per- + mitted to prefix any of these properties with "Is". + + 8. PCRE2 supports the \Q...\E escape for quoting substrings. Characters + in between are treated as literals. However, this is slightly different + from Perl in that $ and @ are also handled as literals inside the + quotes. In Perl, they cause variable interpolation (PCRE2 does not have + variables). Also, Perl does "double-quotish backslash interpolation" on + any backslashes between \Q and \E which, its documentation says, "may + lead to confusing results". PCRE2 treats a backslash between \Q and \E + just like any other character. Note the following examples: + + Pattern PCRE2 matches Perl matches + + \Qabc$xyz\E abc$xyz abc followed by the + contents of $xyz + \Qabc\$xyz\E abc\$xyz abc\$xyz + \Qabc\E\$\Qxyz\E abc$xyz abc$xyz + \QA\B\E A\B A\B + \Q\\E \ \\E + + The \Q...\E sequence is recognized both inside and outside character + classes by both PCRE2 and Perl. Another difference from Perl is that + any appearance of \Q or \E inside what might otherwise be a quantifier + causes PCRE2 not to recognize the sequence as a quantifier. Perl recog- + nizes a quantifier if (redundantly) either of the numbers is inside + \Q...\E, but not if the separating comma is. When not recognized as a + quantifier a sequence such as {\Q1\E,2} is treated as the literal + string "{1,2}". + + 9. Fairly obviously, PCRE2 does not support the (?{code}) and + (??{code}) constructions. However, PCRE2 does have a "callout" feature, + which allows an external function to be called during pattern matching. + See the pcre2callout documentation for details. + + 10. Subroutine calls (whether recursive or not) were treated as atomic + groups up to PCRE2 release 10.23, but from release 10.30 this changed, + and backtracking into subroutine calls is now supported, as in Perl. + + 11. In PCRE2, if any of the backtracking control verbs are used in a + group that is called as a subroutine (whether or not recursively), + their effect is confined to that group; it does not extend to the sur- + rounding pattern. This is not always the case in Perl. In particular, + if (*THEN) is present in a group that is called as a subroutine, its + action is limited to that group, even if the group does not contain any + | characters. Note that such groups are processed as anchored at the + point where they are tested. PCRE2 also confines all control verbs + within atomic assertions, again including (*THEN) in assertions with + only one branch. + + 12. If a pattern contains more than one backtracking control verb, the + first one that is backtracked onto acts. For example, in the pattern + A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure + in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases + it is the same as PCRE2, but there are cases where it differs. + + 13. There are some differences that are concerned with the settings of + captured strings when part of a pattern is repeated. For example, + matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 un- + set, but in PCRE2 it is set to "b". + + 14. PCRE2's handling of duplicate capture group numbers and names is + not as general as Perl's. This is a consequence of the fact the PCRE2 + works internally just with numbers, using an external table to trans- + late between numbers and names. In particular, a pattern such as + (?|(?A)|(?B)), where the two capture groups have the same number + but different names, is not supported, and causes an error at compile + time. If it were allowed, it would not be possible to distinguish which + group matched, because both names map to capture group number 1. To + avoid this confusing situation, an error is given at compile time. + + 15. Perl used to recognize comments in some places that PCRE2 does not, + for example, between the ( and ? at the start of a group. If the /x + modifier is set, Perl allowed white space between ( and ? though the + latest Perls give an error (for a while it was just deprecated). There + may still be some cases where Perl behaves differently. + + 16. Perl, when in warning mode, gives warnings for character classes + such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- + als. PCRE2 has no warning features, so it gives an error in these cases + because they are almost certainly user mistakes. + + 17. In PCRE2, until release 10.45, the upper/lower case character prop- + erties Lu and Ll were not affected when case-independent matching was + specified. Perl has changed in this respect, and PCRE2 has now changed + to match. When caseless matching is in force, Lu, Ll, and Lt (title + case) are all treated as Lc (cased letter). + + 18. From release 5.32.0, Perl locks out the use of \K in lookaround as- + sertions. From release 10.38 PCRE2 does the same by default. However, + there is an option for re-enabling the previous behaviour. When this + option is set, \K is acted on when it occurs in positive assertions, + but is ignored in negative assertions. + + 19. PCRE2 provides some extensions to the Perl regular expression fa- + cilities. Perl 5.10 included new features that were not in earlier + versions of Perl, some of which (such as named parentheses) were in + PCRE2 for some time before. This list is with respect to Perl 5.38: + + (a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the + $ meta-character matches only at the very end of the string. + + (b) A backslash followed by a letter with no special meaning is + faulted. (Perl can be made to issue a warning.) + + (c) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti- + fiers is inverted, that is, by default they are not greedy, but if fol- + lowed by a question mark they are. + + (d) PCRE2_ANCHORED can be used at matching time to force a pattern to + be tried only at the first matching position in the subject string. + + (e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and + PCRE2_NOTEMPTY_ATSTART options have no Perl equivalents. + + (f) The \R escape sequence can be restricted to match only CR, LF, or + CRLF by the PCRE2_BSR_ANYCRLF option. + + (g) The callout facility is PCRE2-specific. Perl supports codeblocks + and variable interpolation, but not general hooks on every match. + + (h) The partial matching facility is PCRE2-specific. + + (i) The alternative matching function (pcre2_dfa_match() matches in a + different way and is not Perl-compatible. + + (j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) + at the start of a pattern. These set overall options that cannot be + changed within the pattern. + + (k) PCRE2 supports non-atomic positive lookaround assertions. This is + an extension to the lookaround facilities. The default, Perl-compatible + lookarounds are atomic. + + (l) There are three syntactical items in patterns that can refer to a + capturing group by number: back references such as \g{2}, subroutine + calls such as (?3), and condition references such as (?(4)...). PCRE2 + supports relative group numbers such as +2 and -4 in all three cases. + Perl supports both plus and minus for subroutine calls, but only minus + for back references, and no relative numbering at all for conditions. + + (m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 ex- + tension that is not available in Perl. + + 20. Perl has different limits than PCRE2. See the pcre2limit documenta- + tion for details. Perl went with 5.10 from recursion to iteration keep- + ing the intermediate matches on the heap, which is ~10% slower but does + not fall into any stack-overflow limit. PCRE2 made a similar change at + release 10.30, and also has many build-time and run-time customizable + limits. + + 21. Unlike Perl, PCRE2 doesn't have character set modifiers and spe- + cially no way to set characters by context just like Perl's "/d". A + regular expression using PCRE2_UTF and PCRE2_UCP will use similar rules + to Perl's "/u"; something closer to "/a" could be selected by adding + other PCRE2_EXTRA_ASCII* options on top. + + 22. Some recursive patterns that Perl diagnoses as infinite recursions + can be handled by PCRE2, either by the interpreter or the JIT. An exam- + ple is /(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number + of repeated "abcd" substrings at the end of the subject. + + 23. Both PCRE2 and Perl error when \x{ escapes are invalid, but Perl + tries to recover and prints a warning if the problem was that an in- + valid hexadecimal digit was found, since PCRE2 doesn't have warnings it + returns an error instead. Additionally, Perl accepts \x{} and gener- + ates NUL unlike PCRE2. + + 24. From release 10.45, PCRE2 gives an error if \x is not followed by a + hexadecimal digit or a curly bracket. It used to interpret this as the + NUL character. Perl still generates NUL, but warns when in warning mode + in most cases. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 02 October 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 02 October 2024 PCRE2COMPAT(3) +------------------------------------------------------------------------------ + + +PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 JUST-IN-TIME COMPILER SUPPORT + + Just-in-time compiling is a heavyweight optimization that can greatly + speed up pattern matching. However, it comes at the cost of extra pro- + cessing before the match is performed, so it is of most benefit when + the same pattern is going to be matched many times. This does not nec- + essarily mean many calls of a matching function; if the pattern is not + anchored, matching attempts may take place many times at various posi- + tions in the subject, even for a single call. Therefore, if the subject + string is very long, it may still pay to use JIT even for one-off + matches. JIT support is available for all of the 8-bit, 16-bit and + 32-bit PCRE2 libraries. + + JIT support applies only to the traditional Perl-compatible matching + function. It does not apply when the DFA matching function is being + used. The code for JIT support was written by Zoltan Herczeg. + + +AVAILABILITY OF JIT SUPPORT + + JIT support is an optional feature of PCRE2. The "configure" option + --enable-jit (or equivalent CMake option) must be set when PCRE2 is + built if you want to use JIT. The support is limited to the following + hardware platforms: + + ARM 32-bit (v7, and Thumb2) + ARM 64-bit + IBM s390x 64 bit + Intel x86 32-bit and 64-bit + LoongArch 64 bit + MIPS 32-bit and 64-bit + Power PC 32-bit and 64-bit + RISC-V 32-bit and 64-bit + + If --enable-jit is set on an unsupported platform, compilation fails. + + A client program can tell if JIT support has been compiled by calling + pcre2_config() with the PCRE2_CONFIG_JIT option. The result is one if + PCRE2 was built with JIT support, and zero otherwise. However, having + the JIT code available does not guarantee that it will be used for any + particular match. One reason for this is that there are a number of op- + tions and pattern items that are not supported by JIT (see below). An- + other reason is that in some environments JIT is unable to get exe- + cutable memory in which to build its compiled code. The only guarantee + from pcre2_config() is that if it returns zero, JIT will definitely not + be used. + + As of release 10.45 there is a more informative way to test for JIT + support. If pcre2_compile_jit() is called with the single option + PCRE2_JIT_TEST_ALLOC it returns zero if JIT is available and has a + working allocator. Otherwise it returns PCRE2_ERROR_NOMEMORY if JIT is + available but cannot allocate executable memory, or PCRE2_ERROR_JIT_UN- + SUPPORTED if JIT support is not compiled. The code argument is ignored, + so it can be a NULL value. + + A simple program does not need to check availability in order to use + JIT when possible. The API is implemented in a way that falls back to + the interpretive code if JIT is not available or cannot be used for a + given match. For programs that need the best possible performance, + there is a "fast path" API that is JIT-specific. + + +SIMPLE USE OF JIT + + To make use of the JIT support in the simplest way, all you have to do + is to call pcre2_jit_compile() after successfully compiling a pattern + with pcre2_compile(). This function has two arguments: the first is the + compiled pattern pointer that was returned by pcre2_compile(), and the + second is zero or more of the following option bits: PCRE2_JIT_COM- + PLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. + + If JIT support is not available, a call to pcre2_jit_compile() does + nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled + pattern is passed to the JIT compiler, which turns it into machine code + that executes much faster than the normal interpretive code, but yields + exactly the same results. The returned value from pcre2_jit_compile() + is zero on success, or a negative error code. + + There is a limit to the size of pattern that JIT supports, imposed by + the size of machine stack that it uses. The exact rules are not docu- + mented because they may change at any time, in particular, when new op- + timizations are introduced. If a pattern is too big, a call to + pcre2_jit_compile() returns PCRE2_ERROR_NOMEMORY. + + PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com- + plete matches. If you want to run partial matches using the PCRE2_PAR- + TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should + set one or both of the other options as well as, or instead of + PCRE2_JIT_COMPLETE. The JIT compiler generates different optimized code + for each of the three modes (normal, soft partial, hard partial). When + pcre2_match() is called, the appropriate code is run if it is avail- + able. Otherwise, the pattern is matched using interpretive code. + + You can call pcre2_jit_compile() multiple times for the same compiled + pattern. It does nothing if it has previously compiled code for any of + the option bits. For example, you can call it once with PCRE2_JIT_COM- + PLETE and (perhaps later, when you find you need partial matching) + again with PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it + will ignore PCRE2_JIT_COMPLETE and just compile code for partial match- + ing. If pcre2_jit_compile() is called with no option bits set, it imme- + diately returns zero. This is an alternative way of testing whether JIT + support has been compiled. + + At present, it is not possible to free JIT compiled code except when + the entire compiled pattern is freed by calling pcre2_code_free(). + + In some circumstances you may need to call additional functions. These + are described in the section entitled "Controlling the JIT stack" be- + low. + + There are some pcre2_match() options that are not supported by JIT, and + there are also some pattern items that JIT cannot handle. Details are + given below. In both cases, matching automatically falls back to the + interpretive code. If you want to know whether JIT was actually used + for a particular match, you should arrange for a JIT callback function + to be set up as described in the section entitled "Controlling the JIT + stack" below, even if you do not need to supply a non-default JIT + stack. Such a callback function is called whenever JIT code is about to + be obeyed. If the match-time options are not right for JIT execution, + the callback function is not obeyed. + + If the JIT compiler finds an unsupported item, no JIT data is gener- + ated. You can find out if JIT compilation was successful for a compiled + pattern by calling pcre2_pattern_info() with the PCRE2_INFO_JITSIZE op- + tion. A non-zero result means that JIT compilation was successful. A + result of 0 means that JIT support is not available, or the pattern was + not processed by pcre2_jit_compile(), or the JIT compiler was not able + to handle the pattern. Successful JIT compilation does not, however, + guarantee the use of JIT at match time because there are some match + time options that are not supported by JIT. + + +MATCHING SUBJECTS CONTAINING INVALID UTF + + When a pattern is compiled with the PCRE2_UTF option, subject strings + are normally expected to be a valid sequence of UTF code units. By de- + fault, this is checked at the start of matching and an error is gener- + ated if invalid UTF is detected. The PCRE2_NO_UTF_CHECK option can be + passed to pcre2_match() to skip the check (for improved performance) if + you are sure that a subject string is valid. If this option is used + with an invalid string, the result is undefined. The calling program + may crash or loop or otherwise misbehave. + + However, a way of running matches on strings that may contain invalid + UTF sequences is available. Calling pcre2_compile() with the + PCRE2_MATCH_INVALID_UTF option has two effects: it tells the inter- + preter in pcre2_match() to support invalid UTF, and, if pcre2_jit_com- + pile() is subsequently called, the compiled JIT code also supports in- + valid UTF. Details of how this support works, in both the JIT and the + interpretive cases, is given in the pcre2unicode documentation. + + There is also an obsolete option for pcre2_jit_compile() called + PCRE2_JIT_INVALID_UTF, which currently exists only for backward compat- + ibility. It is superseded by the pcre2_compile() option + PCRE2_MATCH_INVALID_UTF and should no longer be used. It may be removed + in future. + + +UNSUPPORTED OPTIONS AND PATTERN ITEMS + + The pcre2_match() options that are supported for JIT matching are + PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, + PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and + PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options + are not supported at match time. + + If the PCRE2_NO_JIT option is passed to pcre2_match() it disables the + use of JIT, forcing matching by the interpreter code. + + The only unsupported pattern items are \C (match a single data unit) + when running in a UTF mode, and a callout immediately before an asser- + tion condition in a conditional group. + + +RETURN VALUES FROM JIT MATCHING + + When a pattern is matched using JIT, the return values are the same as + those given by the interpretive pcre2_match() code, with the addition + of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the + memory used for the JIT stack was insufficient. See "Controlling the + JIT stack" below for a discussion of JIT stack usage. + + The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if + searching a very large pattern tree goes on for too long, as it is in + the same circumstance when JIT is not used, but the details of exactly + what is counted are not the same. The PCRE2_ERROR_DEPTHLIMIT error code + is never returned when JIT matching is used. + + +CONTROLLING THE JIT STACK + + When the compiled JIT code runs, it needs a block of memory to use as a + stack. By default, it uses 32KiB on the machine stack. However, some + large or complicated patterns need more than this. The error PCRE2_ER- + ROR_JIT_STACKLIMIT is given when there is not enough stack. Three func- + tions are provided for managing blocks of memory for use as JIT stacks. + There is further discussion about the use of JIT stacks in the section + entitled "JIT stack FAQ" below. + + The pcre2_jit_stack_create() function creates a JIT stack. Its argu- + ments are a starting size, a maximum size, and a general context (for + memory allocation functions, or NULL for standard memory allocation). + It returns a pointer to an opaque structure of type pcre2_jit_stack, or + NULL if there is an error. The pcre2_jit_stack_free() function is used + to free a stack that is no longer needed. If its argument is NULL, this + function returns immediately, without doing anything. (For the techni- + cally minded: the address space is allocated by mmap or VirtualAlloc.) + A maximum stack size of 512KiB to 1MiB should be more than enough for + any pattern. + + The pcre2_jit_stack_assign() function specifies which stack JIT code + should use. Its arguments are as follows: + + pcre2_match_context *mcontext + pcre2_jit_callback callback + void *data + + The first argument is a pointer to a match context. When this is subse- + quently passed to a matching function, its information determines which + JIT stack is used. If this argument is NULL, the function returns imme- + diately, without doing anything. There are three cases for the values + of the other two options: + + (1) If callback is NULL and data is NULL, an internal 32KiB block + on the machine stack is used. This is the default when a match + context is created. + + (2) If callback is NULL and data is not NULL, data must be + a pointer to a valid JIT stack, the result of calling + pcre2_jit_stack_create(). + + (3) If callback is not NULL, it must point to a function that is + called with data as an argument at the start of matching, in + order to set up a JIT stack. If the return from the callback + function is NULL, the internal 32KiB stack is used; otherwise the + return value must be a valid JIT stack, the result of calling + pcre2_jit_stack_create(). + + A callback function is obeyed whenever JIT code is about to be run; it + is not obeyed when pcre2_match() is called with options that are incom- + patible for JIT matching. A callback function can therefore be used to + determine whether a match operation was executed by JIT or by the in- + terpreter. + + You may safely use the same JIT stack for more than one pattern (either + by assigning directly or by callback), as long as the patterns are + matched sequentially in the same thread. Currently, the only way to set + up non-sequential matches in one thread is to use callouts: if a call- + out function starts another match, that match must use a different JIT + stack to the one used for currently suspended match(es). + + In a multithread application, if you do not specify a JIT stack, or if + you assign or pass back NULL from a callback, that is thread-safe, be- + cause each thread has its own machine stack. However, if you assign or + pass back a non-NULL JIT stack, this must be a different stack for each + thread so that the application is thread-safe. + + Strictly speaking, even more is allowed. You can assign the same non- + NULL stack to a match context that is used by any number of patterns, + as long as they are not used for matching by multiple threads at the + same time. For example, you could use the same stack in all compiled + patterns, with a global mutex in the callback to wait until the stack + is available for use. However, this is an inefficient solution, and not + recommended. + + This is a suggestion for how a multithreaded program that needs to set + up non-default JIT stacks might operate: + + During thread initialization + thread_local_var = pcre2_jit_stack_create(...) + + During thread exit + pcre2_jit_stack_free(thread_local_var) + + Use a one-line callback function + return thread_local_var + + All the functions described in this section do nothing if JIT is not + available. + + +JIT STACK FAQ + + (1) Why do we need JIT stacks? + + PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack + where the local data of the current node is pushed before checking its + child nodes. Allocating real machine stack on some platforms is diffi- + cult. For example, the stack chain needs to be updated every time if we + extend the stack on PowerPC. Although it is possible, its updating + time overhead decreases performance. So we do the recursion in memory. + + (2) Why don't we simply allocate blocks of memory with malloc()? + + Modern operating systems have a nice feature: they can reserve an ad- + dress space instead of allocating memory. We can safely allocate memory + pages inside this address space, so the stack could grow without moving + memory data (this is important because of pointers). Thus we can allo- + cate 1MiB address space, and use only a single memory page (usually + 4KiB) if that is enough. However, we can still grow up to 1MiB anytime + if needed. + + (3) Who "owns" a JIT stack? + + The owner of the stack is the user program, not the JIT studied pattern + or anything else. The user program must ensure that if a stack is being + used by pcre2_match(), (that is, it is assigned to a match context that + is passed to the pattern currently running), that stack must not be + used by any other threads (to avoid overwriting the same memory area). + The best practice for multithreaded programs is to allocate a stack for + each thread, and return this stack through the JIT callback function. + + (4) When should a JIT stack be freed? + + You can free a JIT stack at any time, as long as it will not be used by + pcre2_match() again. When you assign the stack to a match context, only + a pointer is set. There is no reference counting or any other magic. + You can free compiled patterns, contexts, and stacks in any order, any- + time. Just do not call pcre2_match() with a match context pointing to + an already freed stack, as that will cause SEGFAULT. (Also, do not free + a stack currently used by pcre2_match() in another thread). You can + also replace the stack in a context at any time when it is not in use. + You should free the previous stack before assigning a replacement. + + (5) Should I allocate/free a stack every time before/after calling + pcre2_match()? + + No, because this is too costly in terms of resources. However, you + could implement some clever idea which release the stack if it is not + used in let's say two minutes. The JIT callback can help to achieve + this without keeping a list of patterns. + + (6) OK, the stack is for long term memory allocation. But what happens + if a pattern causes stack overflow with a stack of 1MiB? Is that 1MiB + kept until the stack is freed? + + Especially on embedded systems, it might be a good idea to release mem- + ory sometimes without freeing the stack. There is no API for this at + the moment. Probably a function call which returns with the currently + allocated memory for any stack and another which allows releasing mem- + ory (shrinking the stack) would be a good idea if someone needs this. + + (7) This is too much of a headache. Isn't there any better solution for + JIT stack handling? + + No, thanks to Windows. If POSIX threads were used everywhere, we could + throw out this complicated API. + + +FREEING JIT SPECULATIVE MEMORY + + void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); + + The JIT executable allocator does not free all memory when it is possi- + ble. It expects new allocations, and keeps some free memory around to + improve allocation speed. However, in low memory conditions, it might + be better to free all possible memory. You can cause this to happen by + calling pcre2_jit_free_unused_memory(). Its argument is a general con- + text, for custom memory management, or NULL for standard memory manage- + ment. + + +EXAMPLE CODE + + This is a single-threaded example that specifies a JIT stack without + using a callback. A real program should include error checking after + all the function calls. + + int rc; + pcre2_code *re; + pcre2_match_data *match_data; + pcre2_match_context *mcontext; + pcre2_jit_stack *jit_stack; + + re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, + &errornumber, &erroffset, NULL); + rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); + mcontext = pcre2_match_context_create(NULL); + jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL); + pcre2_jit_stack_assign(mcontext, NULL, jit_stack); + match_data = pcre2_match_data_create(re, 10); + rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext); + /* Process result */ + + pcre2_code_free(re); + pcre2_match_data_free(match_data); + pcre2_match_context_free(mcontext); + pcre2_jit_stack_free(jit_stack); + + +JIT FAST PATH API + + Because the API described above falls back to interpreted matching when + JIT is not available, it is convenient for programs that are written + for general use in many environments. However, calling JIT via + pcre2_match() does have a performance impact. Programs that are written + for use where JIT is known to be available, and which need the best + possible performance, can instead use a "fast path" API to call JIT + matching directly instead of calling pcre2_match() (obviously only for + patterns that have been successfully processed by pcre2_jit_compile()). + + The fast path function is called pcre2_jit_match(), and it takes ex- + actly the same arguments as pcre2_match(). However, the subject string + must be specified with a length; PCRE2_ZERO_TERMINATED is not sup- + ported. Unsupported option bits (for example, PCRE2_ANCHORED and + PCRE2_ENDANCHORED) are ignored, as is the PCRE2_NO_JIT option. The re- + turn values are also the same as for pcre2_match(), plus PCRE2_ER- + ROR_JIT_BADOPTION if a matching mode (partial or complete) is requested + that was not compiled. + + When you call pcre2_match(), as well as testing for invalid options, a + number of other sanity checks are performed on the arguments. For exam- + ple, if the subject pointer is NULL but the length is non-zero, an im- + mediate error is given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF + subject string is tested for validity. In the interests of speed, these + checks do not happen on the JIT fast path. If invalid UTF data is + passed when PCRE2_MATCH_INVALID_UTF was not set for pcre2_compile(), + the result is undefined. The program may crash or loop or give wrong + results. In the absence of PCRE2_MATCH_INVALID_UTF you should call + pcre2_jit_match() in UTF mode only if you are sure the subject is + valid. + + Bypassing the sanity checks and the pcre2_match() wrapping can give + speedups of more than 10%. + + +SEE ALSO + + pcre2api(3), pcre2unicode(3) + + +AUTHOR + + Philip Hazel (FAQ by Zoltan Herczeg) + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 22 August 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 22 August 2024 PCRE2JIT(3) +------------------------------------------------------------------------------ + + +PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +SIZE AND OTHER LIMITATIONS + + There are some size limitations in PCRE2 but it is hoped that they will + never in practice be relevant. + + The maximum size of a compiled pattern is approximately 64 thousand + code units for the 8-bit and 16-bit libraries if PCRE2 is compiled with + the default internal linkage size, which is 2 bytes for these li- + braries. If you want to process regular expressions that are truly + enormous, you can compile PCRE2 with an internal linkage size of 3 or 4 + (when building the 16-bit library, 3 is rounded up to 4). See the + README file in the source distribution and the pcre2build documentation + for details. In these cases the limit is substantially larger. How- + ever, the speed of execution is slower. In the 32-bit library, the in- + ternal linkage size is always 4. + + The maximum length of a source pattern string is essentially unlimited; + it is the largest number a PCRE2_SIZE variable can hold. However, the + program that calls pcre2_compile() can specify a smaller limit. + + The maximum length (in code units) of a subject string is one less than + the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an un- + signed integer type, usually defined as size_t. Its maximum value (that + is ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-termi- + nated strings and unset offsets. + + All values in repeating quantifiers must be less than 65536. + + There are two different limits that apply to branches of lookbehind as- + sertions. If every branch in such an assertion matches a fixed number + of characters, the maximum length of any branch is 65535 characters. If + any branch matches a variable number of characters, then the maximum + matching length for every branch is limited. The default limit is set + at compile time, defaulting to 255, but can be changed by the calling + program. + + There is no limit to the number of parenthesized groups, but there can + be no more than 65535 capture groups, and there is a limit to the depth + of nesting of parenthesized subpatterns of all kinds. This is imposed + in order to limit the amount of system stack used at compile time. The + default limit can be specified when PCRE2 is built; if not, the default + is set to 250. An application can change this limit by calling + pcre2_set_parens_nest_limit() to set the limit in a compile context. + + The maximum length of name for a named capture group is 32 code units, + and the maximum number of such groups is 10000. + + The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or + (*THEN) verb is 255 code units for the 8-bit library and 65535 code + units for the 16-bit and 32-bit libraries. + + The maximum length of a string argument to a callout is the largest + number a 32-bit unsigned integer can hold. + + The maximum amount of heap memory used for matching is controlled by + the heap limit, which can be set in a pattern or in a match context. + The default is a very large number, effectively unlimited. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 16 August 2023 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.45 16 August 2023 PCRE2LIMITS(3) +------------------------------------------------------------------------------ + + +PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 MATCHING ALGORITHMS + + This document describes the two different algorithms that are available + in PCRE2 for matching a compiled regular expression against a given + subject string. The "standard" algorithm is the one provided by the + pcre2_match() function. This works in the same as Perl's matching func- + tion, and provides a Perl-compatible matching operation. The just-in- + time (JIT) optimization that is described in the pcre2jit documentation + is compatible with this function. + + An alternative algorithm is provided by the pcre2_dfa_match() function; + it operates in a different way, and is not Perl-compatible. This alter- + native has advantages and disadvantages compared with the standard al- + gorithm, and these are described below. + + When there is only one possible way in which a given subject string can + match a pattern, the two algorithms give the same answer. A difference + arises, however, when there are multiple possibilities. For example, if + the anchored pattern + + ^<.*> + + is matched against the string + + + + there are three possible answers. The standard algorithm finds only one + of them, whereas the alternative algorithm finds all three. + + +REGULAR EXPRESSIONS AS TREES + + The set of strings that are matched by a regular expression can be rep- + resented as a tree structure. An unlimited repetition in the pattern + makes the tree of infinite size, but it is still a tree. Matching the + pattern to a given subject string (from a given starting point) can be + thought of as a search of the tree. There are two ways to search a + tree: depth-first and breadth-first, and these correspond to the two + matching algorithms provided by PCRE2. + + +THE STANDARD MATCHING ALGORITHM + + In the terminology of Jeffrey Friedl's book "Mastering Regular Expres- + sions", the standard algorithm is an "NFA algorithm". It conducts a + depth-first search of the pattern tree. That is, it proceeds along a + single path through the tree, checking that the subject matches what is + required. When there is a mismatch, the algorithm tries any alterna- + tives at the current point, and if they all fail, it backs up to the + previous branch point in the tree, and tries the next alternative + branch at that level. This often involves backing up (moving to the + left) in the subject string as well. The order in which repetition + branches are tried is controlled by the greedy or ungreedy nature of + the quantifier. + + If a leaf node is reached, a matching string has been found, and at + that point the algorithm stops. Thus, if there is more than one possi- + ble match, this algorithm returns the first one that it finds. Whether + this is the shortest, the longest, or some intermediate length depends + on the way the alternations and the greedy or ungreedy repetition quan- + tifiers are specified in the pattern. + + Because it ends up with a single path through the tree, it is rela- + tively straightforward for this algorithm to keep track of the sub- + strings that are matched by portions of the pattern in parentheses. + This provides support for capturing parentheses and backreferences. + + +THE ALTERNATIVE MATCHING ALGORITHM + + This algorithm conducts a breadth-first search of the tree. Starting + from the first matching point in the subject, it scans the subject + string from left to right, once, character by character, and as it does + this, it remembers all the paths through the tree that represent valid + matches. In Friedl's terminology, this is a kind of "DFA algorithm", + though it is not implemented as a traditional finite state machine (it + keeps multiple states active simultaneously). + + Although the general principle of this matching algorithm is that it + scans the subject string only once, without backtracking, there is one + exception: when a lookaround assertion is encountered, the characters + following or preceding the current point have to be independently in- + spected. + + The scan continues until either the end of the subject is reached, or + there are no more unterminated paths. At this point, terminated paths + represent the different matching possibilities (if there are none, the + match has failed). Thus, if there is more than one possible match, + this algorithm finds all of them, and in particular, it finds the + longest. The matches are returned in the output vector in decreasing + order of length. There is an option to stop the algorithm after the + first match (which is necessarily the shortest) is found. + + Note that the size of vector needed to contain all the results depends + on the number of simultaneous matches, not on the number of capturing + parentheses in the pattern. Using pcre2_match_data_create_from_pat- + tern() to create the match data block is therefore not advisable when + doing DFA matching. + + Note also that all the matches that are found start at the same point + in the subject. If the pattern + + cat(er(pillar)?)? + + is matched against the string "the caterpillar catchment", the result + is the three strings "caterpillar", "cater", and "cat" that start at + the fifth character of the subject. The algorithm does not automati- + cally move on to find matches that start at later positions. + + PCRE2's "auto-possessification" optimization usually applies to charac- + ter repeats at the end of a pattern (as well as internally). For exam- + ple, the pattern "a\d+" is compiled as if it were "a\d++" because there + is no point even considering the possibility of backtracking into the + repeated digits. For DFA matching, this means that only one possible + match is found. If you really do want multiple matches in such cases, + either use an ungreedy repeat ("a\d+?") or set the PCRE2_NO_AUTO_POS- + SESS option when compiling. + + There are a number of features of PCRE2 regular expressions that are + not supported or behave differently in the alternative matching func- + tion. Those that are not supported cause an error if encountered. + + 1. Because the algorithm finds all possible matches, the greedy or un- + greedy nature of repetition quantifiers is not relevant (though it may + affect auto-possessification, as just described). During matching, + greedy and ungreedy quantifiers are treated in exactly the same way. + However, possessive quantifiers can make a difference when what follows + could also match what is quantified, for example in a pattern like + this: + + ^a++\w! + + This pattern matches "aaab!" but not "aaa!", which would be matched by + a non-possessive quantifier. Similarly, if an atomic group is present, + it is matched as if it were a standalone pattern at the current point, + and the longest match is then "locked in" for the rest of the overall + pattern. + + 2. When dealing with multiple paths through the tree simultaneously, it + is not straightforward to keep track of captured substrings for the + different matching possibilities, and PCRE2's implementation of this + algorithm does not attempt to do this. This means that no captured sub- + strings are available. + + 3. Because no substrings are captured, a number of related features are + not available: + + (a) Backreferences; + + (b) Conditional expressions that use a backreference as the condition + or test for a specific group recursion; + + (c) Script runs; + + (d) Scan substring assertions. + + 4. Because many paths through the tree may be active, the \K escape se- + quence, which resets the start of the match when encountered (but may + be on some paths and not on others), is not supported. + + 5. Callouts are supported, but the value of the capture_top field is + always 1, and the value of the capture_last field is always 0. + + 6. The \C escape sequence, which (in the standard algorithm) always + matches a single code unit, even in a UTF mode, is not supported in UTF + modes because the alternative algorithm moves through the subject + string one character (not code unit) at a time, for all active paths + through the tree. + + 7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) + are not supported. (*FAIL) is supported, and behaves like a failing + negative assertion. + + 8. The PCRE2_MATCH_INVALID_UTF option for pcre2_compile() is not sup- + ported by pcre2_dfa_match(). + + +ADVANTAGES OF THE ALTERNATIVE ALGORITHM + + The main advantage of the alternative algorithm is that all possible + matches (at a single point in the subject) are automatically found, and + in particular, the longest match is found. To find more than one match + at the same point using the standard algorithm, you have to do kludgy + things with callouts. + + Partial matching is possible with this algorithm, though it has some + limitations. The pcre2partial documentation gives details of partial + matching and discusses multi-segment matching. + + +DISADVANTAGES OF THE ALTERNATIVE ALGORITHM + + The alternative algorithm suffers from a number of disadvantages: + + 1. It is substantially slower than the standard algorithm. This is + partly because it has to search for all possible matches, but is also + because it is less susceptible to optimization. + + 2. Capturing parentheses and other features such as backreferences that + rely on them are not supported. + + 3. Matching within invalid UTF strings is not supported. + + 4. Although atomic groups are supported, their use does not provide the + performance advantage that it does for the standard algorithm. + + 5. JIT optimization is not supported. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 30 August 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 30 August 2024 PCRE2MATCHING(3) +------------------------------------------------------------------------------ + + +PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PARTIAL MATCHING IN PCRE2 + + In normal use of PCRE2, if there is a match up to the end of a subject + string, but more characters are needed to match the entire pattern, + PCRE2_ERROR_NOMATCH is returned, just like any other failing match. + There are circumstances where it might be helpful to distinguish this + "partial match" case. + + One example is an application where the subject string is very long, + and not all available at once. The requirement here is to be able to do + the matching segment by segment, but special action is needed when a + matched substring spans the boundary between two segments. + + Another example is checking a user input string as it is typed, to en- + sure that it conforms to a required format. Invalid characters can be + immediately diagnosed and rejected, giving instant feedback. + + Partial matching is a PCRE2-specific feature; it is not Perl-compati- + ble. It is requested by setting one of the PCRE2_PARTIAL_HARD or + PCRE2_PARTIAL_SOFT options when calling a matching function. The dif- + ference between the two options is whether or not a partial match is + preferred to an alternative complete match, though the details differ + between the two types of matching function. If both options are set, + PCRE2_PARTIAL_HARD takes precedence. + + If you want to use partial matching with just-in-time optimized code, + as well as setting a partial match option for the matching function, + you must also call pcre2_jit_compile() with one or both of these op- + tions: + + PCRE2_JIT_PARTIAL_HARD + PCRE2_JIT_PARTIAL_SOFT + + PCRE2_JIT_COMPLETE should also be set if you are going to run non-par- + tial matches on the same pattern. Separate code is compiled for each + mode. If the appropriate JIT mode has not been compiled, interpretive + matching code is used. + + Setting a partial matching option disables two of PCRE2's standard op- + timization hints. PCRE2 remembers the last literal code unit in a pat- + tern, and abandons matching immediately if it is not present in the + subject string. This optimization cannot be used for a subject string + that might match only partially. PCRE2 also remembers a minimum length + of a matching string, and does not bother to run the matching function + on shorter strings. This optimization is also disabled for partial + matching. + + +REQUIREMENTS FOR A PARTIAL MATCH + + A possible partial match occurs during matching when the end of the + subject string is reached successfully, but either more characters are + needed to complete the match, or the addition of more characters might + change what is matched. + + Example 1: if the pattern is /abc/ and the subject is "ab", more char- + acters are definitely needed to complete a match. In this case both + hard and soft matching options yield a partial match. + + Example 2: if the pattern is /ab+/ and the subject is "ab", a complete + match can be found, but the addition of more characters might change + what is matched. In this case, only PCRE2_PARTIAL_HARD returns a par- + tial match; PCRE2_PARTIAL_SOFT returns the complete match. + + On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if + the next pattern item is \z, \Z, \b, \B, or $ there is always a partial + match. Otherwise, for both options, the next pattern item must be one + that inspects a character, and at least one of the following must be + true: + + (1) At least one character has already been inspected. An inspected + character need not form part of the final matched string; lookbehind + assertions and the \K escape sequence provide ways of inspecting char- + acters before the start of a matched string. + + (2) The pattern contains one or more lookbehind assertions. This condi- + tion exists in case there is a lookbehind that inspects characters be- + fore the start of the match. + + (3) There is a special case when the whole pattern can match an empty + string. When the starting point is at the end of the subject, the + empty string match is a possibility, and if PCRE2_PARTIAL_SOFT is set + and neither of the above conditions is true, it is returned. However, + because adding more characters might result in a non-empty match, + PCRE2_PARTIAL_HARD returns a partial match, which in this case means + "there is going to be a match at this point, but until some more char- + acters are added, we do not know if it will be an empty string or some- + thing longer". + + +PARTIAL MATCHING USING pcre2_match() + + When a partial matching option is set, the result of calling + pcre2_match() can be one of the following: + + A successful match + A complete match has been found, starting and ending within this sub- + ject. + + PCRE2_ERROR_NOMATCH + No match can start anywhere in this subject. + + PCRE2_ERROR_PARTIAL + Adding more characters may result in a complete match that uses one + or more characters from the end of this subject. + + When a partial match is returned, the first two elements in the ovector + point to the portion of the subject that was matched, but the values in + the rest of the ovector are undefined. The appearance of \K in the pat- + tern has no effect for a partial match. Consider this pattern: + + /abc\K123/ + + If it is matched against "456abc123xyz" the result is a complete match, + and the ovector defines the matched string as "123", because \K resets + the "start of match" point. However, if a partial match is requested + and the subject string is "456abc12", a partial match is found for the + string "abc12", because all these characters are needed for a subse- + quent re-match with additional characters. + + If there is more than one partial match, the first one that was found + provides the data that is returned. Consider this pattern: + + /123\w+X|dogY/ + + If this is matched against the subject string "abc123dog", both alter- + natives fail to match, but the end of the subject is reached during + matching, so PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 + and 9, identifying "123dog" as the first partial match. (In this exam- + ple, there are two partial matches, because "dog" on its own partially + matches the second alternative.) + + How a partial match is processed by pcre2_match() + + What happens when a partial match is identified depends on which of the + two partial matching options is set. + + If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon + as a partial match is found, without continuing to search for possible + complete matches. This option is "hard" because it prefers an earlier + partial match over a later complete match. For this reason, the assump- + tion is made that the end of the supplied subject string is not the + true end of the available data, which is why \z, \Z, \b, \B, and $ al- + ways give a partial match. + + If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but + matching continues as normal, and other alternatives in the pattern are + tried. If no complete match can be found, PCRE2_ERROR_PARTIAL is re- + turned instead of PCRE2_ERROR_NOMATCH. This option is "soft" because it + prefers a complete match over a partial match. All the various matching + items in a pattern behave as if the subject string is potentially com- + plete; \z, \Z, and $ match at the end of the subject, as normal, and + for \b and \B the end of the subject is treated as a non-alphanumeric. + + The difference between the two partial matching options can be illus- + trated by a pattern such as: + + /dog(sbody)?/ + + This matches either "dog" or "dogsbody", greedily (that is, it prefers + the longer string if possible). If it is matched against the string + "dog" with PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". + However, if PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PAR- + TIAL. On the other hand, if the pattern is made ungreedy the result is + different: + + /dog(sbody)??/ + + In this case the result is always a complete match because that is + found first, and matching never continues after finding a complete + match. It might be easier to follow this explanation by thinking of the + two patterns like this: + + /dog(sbody)?/ is the same as /dogsbody|dog/ + /dog(sbody)??/ is the same as /dog|dogsbody/ + + The second pattern will never match "dogsbody", because it will always + find the shorter match first. + + Example of partial matching using pcre2test + + The pcre2test data modifiers partial_hard (or ph) and partial_soft (or + ps) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT, respectively, when + calling pcre2_match(). Here is a run of pcre2test using a pattern that + matches the whole subject in the form of a date: + + re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 25dec3\=ph + Partial match: 23dec3 + data> 3ju\=ph + Partial match: 3ju + data> 3juj\=ph + No match + + This example gives the same results for both hard and soft partial + matching options. Here is an example where there is a difference: + + re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 25jun04\=ps + 0: 25jun04 + 1: jun + data> 25jun04\=ph + Partial match: 25jun04 + + With PCRE2_PARTIAL_SOFT, the subject is matched completely. For + PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, + so there is only a partial match. + + +MULTI-SEGMENT MATCHING WITH pcre2_match() + + PCRE was not originally designed with multi-segment matching in mind. + However, over time, features (including partial matching) that make + multi-segment matching possible have been added. A very long string can + be searched segment by segment by calling pcre2_match() repeatedly, + with the aim of achieving the same results that would happen if the en- + tire string was available for searching all the time. Normally, the + strings that are being sought are much shorter than each individual + segment, and are in the middle of very long strings, so the pattern is + normally not anchored. + + Special logic must be implemented to handle a matched substring that + spans a segment boundary. PCRE2_PARTIAL_HARD should be used, because it + returns a partial match at the end of a segment whenever there is the + possibility of changing the match by adding more characters. The + PCRE2_NOTBOL option should also be set for all but the first segment. + + When a partial match occurs, the next segment must be added to the cur- + rent subject and the match re-run, using the startoffset argument of + pcre2_match() to begin at the point where the partial match started. + For example: + + re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/ + data> ...the date is 23ja\=ph + Partial match: 23ja + data> ...the date is 23jan19 and on that day...\=offset=15 + 0: 23jan19 + 1: jan + + Note the use of the offset modifier to start the new match where the + partial match was found. In this example, the next segment was added to + the one in which the partial match was found. This is the most + straightforward approach, typically using a memory buffer that is twice + the size of each segment. After a partial match, the first half of the + buffer is discarded, the second half is moved to the start of the + buffer, and a new segment is added before repeating the match as in the + example above. After a no match, the entire buffer can be discarded. + + If there are memory constraints, you may want to discard text that pre- + cedes a partial match before adding the next segment. Unfortunately, + this is not at present straightforward. In cases such as the above, + where the pattern does not contain any lookbehinds, it is sufficient to + retain only the partially matched substring. However, if the pattern + contains a lookbehind assertion, characters that precede the start of + the partial match may have been inspected during the matching process. + When pcre2test displays a partial match, it indicates these characters + with '<' if the allusedtext modifier is set: + + re> "(?<=123)abc" + data> xx123ab\=ph,allusedtext + Partial match: 123ab + <<< + + However, the allusedtext modifier is not available for JIT matching, + because JIT matching does not record the first (or last) consulted + characters. For this reason, this information is not available via the + API. It is therefore not possible in general to obtain the exact number + of characters that must be retained in order to get the right match re- + sult. If you cannot retain the entire segment, you must find some + heuristic way of choosing. + + If you know the approximate length of the matching substrings, you can + use that to decide how much text to retain. The only lookbehind infor- + mation that is currently available via the API is the length of the + longest individual lookbehind in a pattern, but this can be misleading + if there are nested lookbehinds. The value returned by calling + pcre2_pattern_info() with the PCRE2_INFO_MAXLOOKBEHIND option is the + maximum number of characters (not code units) that any individual look- + behind moves back when it is processed. A pattern such as + "(?<=(? /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 23ja\=dfa,ps + Partial match: 23ja + data> n05\=dfa,dfa_restart + 0: n05 + + The first call has "23ja" as the subject, and requests partial match- + ing; the second call has "n05" as the subject for the continued + (restarted) match. Notice that when the match is complete, only the + last part is shown; PCRE2 does not retain the previously partially- + matched string. It is up to the calling program to do that if it needs + to. This means that, for an unanchored pattern, if a continued match + fails, it is not possible to try again at a new starting point. All + this facility is capable of doing is continuing with the previous match + attempt. For example, consider this pattern: + + 1234|3789 + + If the first part of the subject is "ABC123", a partial match of the + first alternative is found at offset 3. There is no partial match for + the second alternative, because such a match does not start at the same + point in the subject string. Attempting to continue with the string + "7890" does not yield a match because only those alternatives that + match at one point in the subject are remembered. Depending on the ap- + plication, this may or may not be what you want. + + If you do want to allow for starting again at the next character, one + way of doing it is to retain some or all of the segment and try a new + complete match, as described for pcre2_match() above. Another possibil- + ity is to work with two buffers. If a partial match at offset n in the + first buffer is followed by "no match" when PCRE2_DFA_RESTART is used + on the second buffer, you can then try a new match starting at offset + n+1 in the first buffer. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 November 2024 + Copyright (c) 1997-2019 University of Cambridge. + + +PCRE2 10.45 27 November 2024 PCRE2PARTIAL(3) +------------------------------------------------------------------------------ + + +PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 REGULAR EXPRESSION DETAILS + + The syntax and semantics of the regular expressions that are supported + by PCRE2 are described in detail below. There is a quick-reference syn- + tax summary in the pcre2syntax page. PCRE2 tries to match Perl syntax + and semantics as closely as it can. PCRE2 also supports some alterna- + tive regular expression syntax that does not conflict with the Perl + syntax in order to provide some compatibility with regular expressions + in Python, .NET, and Oniguruma. There are in addition some options that + enable alternative syntax and semantics that are not the same as in + Perl. + + Perl's regular expressions are described in its own documentation, and + regular expressions in general are covered in a number of books, some + of which have copious examples. Jeffrey Friedl's "Mastering Regular Ex- + pressions", published by O'Reilly, covers regular expressions in great + detail. This description of PCRE2's regular expressions is intended as + reference material. + + This document discusses the regular expression patterns that are sup- + ported by PCRE2 when its main matching function, pcre2_match(), is + used. PCRE2 also has an alternative matching function, + pcre2_dfa_match(), which matches using a different algorithm that is + not Perl-compatible. Some of the features discussed below are not + available when DFA matching is used. The advantages and disadvantages + of the alternative function, and how it differs from the normal func- + tion, are discussed in the pcre2matching page. + + +EBCDIC CHARACTER CODES + + Most computers use ASCII or Unicode for encoding characters, and PCRE2 + assumes this by default. However, it can be compiled to run in an envi- + ronment that uses the EBCDIC code, which is the case for some IBM main- + frame operating systems. In the sections below, character code values + are ASCII or Unicode; in an EBCDIC environment these characters may + have different code values, and there are no code points greater than + 255. Differences in behaviour when PCRE2 is running in an EBCDIC envi- + ronment are described in the section "EBCDIC environments" below, which + you can ignore unless you really are in an EBCDIC environment. + + +SPECIAL START-OF-PATTERN ITEMS + + A number of options that can be passed to pcre2_compile() can also be + set by special items at the start of a pattern. These are not Perl-com- + patible, but are provided to make these options accessible to pattern + writers who are not able to change the program that processes the pat- + tern. Any number of these items may appear, but they must all be to- + gether right at the start of the pattern string, and the letters must + be in upper case. + + UTF support + + In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either + as single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 + can be specified for the 32-bit library, in which case it constrains + the character values to valid Unicode code points. To process UTF + strings, PCRE2 must be built to include Unicode support (which is the + default). When using UTF strings you must either call the compiling + function with one or both of the PCRE2_UTF or PCRE2_MATCH_INVALID_UTF + options, or the pattern must start with the special sequence (*UTF), + which is equivalent to setting the relevant PCRE2_UTF. How setting a + UTF mode affects pattern matching is mentioned in several places below. + There is also a summary of features in the pcre2unicode page. + + Some applications that allow their users to supply patterns may wish to + restrict them to non-UTF data for security reasons. If the + PCRE2_NEVER_UTF option is passed to pcre2_compile(), (*UTF) is not al- + lowed, and its appearance in a pattern causes an error. + + Unicode property support + + Another special sequence that may appear at the start of a pattern is + (*UCP). This has the same effect as setting the PCRE2_UCP option: it + causes sequences such as \d and \w to use Unicode properties to deter- + mine character types, instead of recognizing only characters with codes + less than 256 via a lookup table. If also causes upper/lower casing op- + erations to use Unicode properties for characters with code points + greater than 127, even when UTF is not set. These behaviours can be + changed within the pattern; see the section entitled "Internal Option + Setting" below. + + Some applications that allow their users to supply patterns may wish to + restrict them for security reasons. If the PCRE2_NEVER_UCP option is + passed to pcre2_compile(), (*UCP) is not allowed, and its appearance in + a pattern causes an error. + + Locking out empty string matching + + Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same + effect as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option + to whichever matching function is subsequently called to match the pat- + tern. These options lock out the matching of empty strings, either en- + tirely, or only at the start of the subject. + + Disabling auto-possessification + + If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as + setting the PCRE2_NO_AUTO_POSSESS option, or calling pcre2_set_opti- + mize() with a PCRE2_AUTO_POSSESS_OFF directive. This stops PCRE2 from + making quantifiers possessive when what follows cannot match the re- + peated item. For example, by default a+b is treated as a++b. For more + details, see the pcre2api documentation. + + Disabling start-up optimizations + + If a pattern starts with (*NO_START_OPT), it has the same effect as + setting the PCRE2_NO_START_OPTIMIZE option, or calling pcre2_set_opti- + mize() with a PCRE2_START_OPTIMIZE_OFF directive. This disables several + optimizations for quickly reaching "no match" results. For more de- + tails, see the pcre2api documentation. + + Disabling automatic anchoring + + If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect + as setting the PCRE2_NO_DOTSTAR_ANCHOR option, or calling pcre2_set_op- + timize() with a PCRE2_DOTSTAR_ANCHOR_OFF directive. This disables opti- + mizations that apply to patterns whose top-level branches all start + with .* (match any number of arbitrary characters). For more details, + see the pcre2api documentation. + + Disabling JIT compilation + + If a pattern that starts with (*NO_JIT) is successfully compiled, an + attempt by the application to apply the JIT optimization by calling + pcre2_jit_compile() is ignored. + + Setting match resource limits + + The pcre2_match() function contains a counter that is incremented every + time it goes round its main loop. The caller of pcre2_match() can set a + limit on this counter, which therefore limits the amount of computing + resource used for a match. The maximum depth of nested backtracking can + also be limited; this indirectly restricts the amount of heap memory + that is used, but there is also an explicit memory limit that can be + set. + + These facilities are provided to catch runaway matches that are pro- + voked by patterns with huge matching trees. A common example is a pat- + tern with nested unlimited repeats applied to a long string that does + not match. When one of these limits is reached, pcre2_match() gives an + error return. The limits can also be set by items at the start of the + pattern of the form + + (*LIMIT_HEAP=d) + (*LIMIT_MATCH=d) + (*LIMIT_DEPTH=d) + + where d is any number of decimal digits. However, the value of the set- + ting must be less than the value set (or defaulted) by the caller of + pcre2_match() for it to have any effect. In other words, the pattern + writer can lower the limits set by the programmer, but not raise them. + If there is more than one setting of one of these limits, the lower + value is used. The heap limit is specified in kibibytes (units of 1024 + bytes). + + Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This + name is still recognized for backwards compatibility. + + The heap limit applies only when the pcre2_match() or pcre2_dfa_match() + interpreters are used for matching. It does not apply to JIT. The match + limit is used (but in a different way) when JIT is being used, or when + pcre2_dfa_match() is called, to limit computing resource usage by those + matching functions. The depth limit is ignored by JIT but is relevant + for DFA matching, which uses function recursion for recursions within + the pattern and for lookaround assertions and atomic groups. In this + case, the depth limit controls the depth of such recursion. + + Newline conventions + + PCRE2 supports six different conventions for indicating line breaks in + strings: a single CR (carriage return) character, a single LF (line- + feed) character, the two-character sequence CRLF, any of the three pre- + ceding, any Unicode newline sequence, or the NUL character (binary + zero). The pcre2api page has further discussion about newlines, and + shows how to set the newline convention when calling pcre2_compile(). + + It is also possible to specify a newline convention by starting a pat- + tern string with one of the following sequences: + + (*CR) carriage return + (*LF) linefeed + (*CRLF) carriage return, followed by linefeed + (*ANYCRLF) any of the three above + (*ANY) all Unicode newline sequences + (*NUL) the NUL character (binary zero) + + These override the default and the options given to the compiling func- + tion. For example, on a Unix system where LF is the default newline se- + quence, the pattern + + (*CR)a.b + + changes the convention to CR. That pattern matches "a\nb" because LF is + no longer a newline. If more than one of these settings is present, the + last one is used. + + The newline convention affects where the circumflex and dollar asser- + tions are true. It also affects the interpretation of the dot metachar- + acter when PCRE2_DOTALL is not set, and the behaviour of \N when not + followed by an opening brace. However, it does not affect what the \R + escape sequence matches. By default, this is any Unicode newline se- + quence, for Perl compatibility. However, this can be changed; see the + next section and the description of \R in the section entitled "Newline + sequences" below. A change of \R setting can be combined with a change + of newline convention. + + Specifying what \R matches + + It is possible to restrict \R to match only CR, LF, or CRLF (instead of + the complete set of Unicode line endings) by setting the option + PCRE2_BSR_ANYCRLF at compile time. This effect can also be achieved by + starting a pattern with (*BSR_ANYCRLF). For completeness, (*BSR_UNI- + CODE) is also recognized, corresponding to PCRE2_BSR_UNICODE. + + +CHARACTERS AND METACHARACTERS + + A regular expression is a pattern that is matched against a subject + string from left to right. Most characters stand for themselves in a + pattern, and match the corresponding characters in the subject. As a + trivial example, the pattern + + The quick brown fox + + matches a portion of a subject string that is identical to itself. When + caseless matching is specified (the PCRE2_CASELESS option or (?i) + within the pattern), letters are matched independently of case. Note + that there are two ASCII characters, K and S, that, in addition to + their lower case ASCII equivalents, are case-equivalent with Unicode + U+212A (Kelvin sign) and U+017F (long S) respectively when either + PCRE2_UTF or PCRE2_UCP is set, unless the PCRE2_EXTRA_CASELESS_RESTRICT + option is in force (either passed to pcre2_compile() or set by (*CASE- + LESS_RESTRICT) or (?r) within the pattern). If the PCRE2_EXTRA_TURK- + ISH_CASING option is in force (either passed to pcre2_compile() or set + by (*TURKISH_CASING) within the pattern), then the 'i' letters are + matched according to Turkish and Azeri languages. + + The power of regular expressions comes from the ability to include wild + cards, character classes, alternatives, and repetitions in the pattern. + These are encoded in the pattern by the use of metacharacters, which do + not stand for themselves but instead are interpreted in some special + way. + + There are two different sets of metacharacters: those that are recog- + nized anywhere in the pattern except within square brackets, and those + that are recognized within square brackets. Outside square brackets, + the metacharacters are as follows: + + \ general escape character with several uses + ^ assert start of string (or line, in multiline mode) + $ assert end of string (or line, in multiline mode) + . match any character except newline (by default) + [ start character class definition + | start of alternative branch + ( start group or control verb + ) end group or control verb + * 0 or more quantifier + + 1 or more quantifier; also "possessive quantifier" + ? 0 or 1 quantifier; also quantifier minimizer + { potential start of min/max quantifier + + Brace characters { and } are also used to enclose data for construc- + tions such as \g{2} or \k{name}. In almost all uses of braces, space + and/or horizontal tab characters that follow { or precede } are allowed + and are ignored. In the case of quantifiers, they may also appear be- + fore or after the comma. The exception to this is \u{...} which is an + ECMAScript compatibility feature that is recognized only when the + PCRE2_EXTRA_ALT_BSUX option is set. ECMAScript does not ignore such + white space; it causes the item to be interpreted as literal. + + Part of a pattern that is in square brackets is called a "character + class". In a character class the only metacharacters are: + + \ general escape character + ^ negate the class, but only if the first character + - indicates character range + [ POSIX character class (if followed by POSIX syntax) + ] terminates the character class + + If a pattern is compiled with the PCRE2_EXTENDED option, most white + space in the pattern, other than in a character class, within a \Q...\E + sequence, or between a # outside a character class and the next new- + line, inclusive, is ignored. An escaping backslash can be used to in- + clude a white space or a # character as part of the pattern. If the + PCRE2_EXTENDED_MORE option is set, the same applies, but in addition + unescaped space and horizontal tab characters are ignored inside a + character class. Note: only these two characters are ignored, not the + full set of pattern white space characters that are ignored outside a + character class. Option settings can be changed within a pattern; see + the section entitled "Internal Option Setting" below. + + The following sections describe the use of each of the metacharacters. + + +BACKSLASH + + The backslash character has several uses. Firstly, if it is followed by + a character that is not a digit or a letter, it takes away any special + meaning that character may have. This use of backslash as an escape + character applies both inside and outside character classes. + + For example, if you want to match a * character, you must write \* in + the pattern. This escaping action applies whether or not the following + character would otherwise be interpreted as a metacharacter, so it is + always safe to precede a non-alphanumeric with backslash to specify + that it stands for itself. In particular, if you want to match a back- + slash, you write \\. + + Only ASCII digits and letters have any special meaning after a back- + slash. All other characters (in particular, those whose code points are + greater than 127) are treated as literals. + + If you want to treat all characters in a sequence as literals, you can + do so by putting them between \Q and \E. Note that this includes white + space even when the PCRE2_EXTENDED option is set so that most other + white space is ignored. The behaviour is different from Perl in that $ + and @ are handled as literals in \Q...\E sequences in PCRE2, whereas in + Perl, $ and @ cause variable interpolation. Also, Perl does "double- + quotish backslash interpolation" on any backslashes between \Q and \E + which, its documentation says, "may lead to confusing results". PCRE2 + treats a backslash between \Q and \E just like any other character. + Note the following examples: + + Pattern PCRE2 matches Perl matches + + \Qabc$xyz\E abc$xyz abc followed by the + contents of $xyz + \Qabc\$xyz\E abc\$xyz abc\$xyz + \Qabc\E\$\Qxyz\E abc$xyz abc$xyz + \QA\B\E A\B A\B + \Q\\E \ \\E + + The \Q...\E sequence is recognized both inside and outside character + classes. An isolated \E that is not preceded by \Q is ignored. If \Q + is not followed by \E later in the pattern, the literal interpretation + continues to the end of the pattern (that is, \E is assumed at the + end). If the isolated \Q is inside a character class, this causes an + error, because the character class is then not terminated by a closing + square bracket. + + Another difference from Perl is that any appearance of \Q or \E inside + what might otherwise be a quantifier causes PCRE2 not to recognize the + sequence as a quantifier. Perl recognizes a quantifier if (redundantly) + either of the numbers is inside \Q...\E, but not if the separating + comma is. When not recognized as a quantifier a sequence such as + {\Q1\E,2} is treated as the literal string "{1,2}". + + Non-printing characters + + A second use of backslash provides a way of encoding non-printing char- + acters in patterns in a visible manner. There is no restriction on the + appearance of non-printing characters in a pattern, but when a pattern + is being prepared by text editing, it is often easier to use one of the + following escape sequences instead of the binary character it repre- + sents. In an ASCII or Unicode environment, these escapes are as fol- + lows: + + \a alarm, that is, the BEL character (hex 07) + \cx "control-x", where x is a non-control ASCII character + \e escape (hex 1B) + \f form feed (hex 0C) + \n linefeed (hex 0A) + \r carriage return (hex 0D) (but see below) + \t tab (hex 09) + \0dd character with octal code 0dd + \ddd character with octal code ddd, or back reference + \o{ddd..} character with octal code ddd.. + \xhh character with hex code hh + \x{hhh..} character with hex code hhh.. + \N{U+hhh..} character with Unicode hex code point hhh.. + + A description of how back references work is given later, following the + discussion of parenthesized groups. + + By default, after \x that is not followed by {, one or two hexadecimal + digits are read (letters can be in upper or lower case). If the charac- + ter that follows \x is neither { nor a hexadecimal digit, an error oc- + curs. This is different from Perl's default behaviour, which generates + a NUL character, but is in line with the behaviour of Perl's 'strict' + mode in re. + + Any number of hexadecimal digits may appear between \x{ and }. If a + character other than a hexadecimal digit appears between \x{ and }, or + if there is no terminating }, an error occurs. + + Characters whose code points are less than 256 can be defined by either + of the two syntaxes for \x or by an octal sequence. There is no differ- + ence in the way they are handled. For example, \xdc is exactly the same + as \x{dc} or \334. However, using the braced versions does make such + sequences easier to read. + + Support is available for some ECMAScript (aka JavaScript) escape se- + quences via two compile-time options. If PCRE2_ALT_BSUX is set, the se- + quence \x followed by { is not recognized. Only if \x is followed by + two hexadecimal digits is it recognized as a character escape. Other- + wise it is interpreted as a literal "x" character. In this mode, sup- + port for code points greater than 256 is provided by \u, which must be + followed by four hexadecimal digits; otherwise it is interpreted as a + literal "u" character. + + PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in ad- + dition, \u{hhh..} is recognized as the character specified by hexadeci- + mal code point. There may be any number of hexadecimal digits, but un- + like other places that also use curly brackets, spaces are not allowed + and would result in the string being interpreted as a literal. This + syntax is from ECMAScript 6. + + The \N{U+hhh..} escape sequence is recognized only when PCRE2 is oper- + ating in UTF mode. Perl also uses \N{name} to specify characters by + Unicode name; PCRE2 does not support this. Note that when \N is not + followed by an opening brace (curly bracket) it has an entirely differ- + ent meaning, matching any character that is not a newline. + + There are some legacy applications where the escape sequence \r is ex- + pected to match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option + is set, \r in a pattern is converted to \n so that it matches a LF + (linefeed) instead of a CR (carriage return) character. + + An error occurs if \c is not followed by a character whose ASCII code + point is in the range 32 to 126. The precise effect of \cx is as fol- + lows: if x is a lower case letter, it is converted to upper case. Then + bit 6 of the character (hex 40) is inverted. Thus \cA to \cZ become hex + 01 to hex 1A (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and + \c; becomes hex 7B (; is 3B). If the code unit following \c has a code + point less than 32 or greater than 126, a compile-time error occurs. + + For differences in the way some escapes behave in EBCDIC environments, + see section "EBCDIC environments" below. + + Octal escapes and back references + + The escape \o must be followed by a sequence of octal digits, enclosed + in braces. An error occurs if this is not the case. This escape pro- + vides a way of specifying character code points as octal numbers + greater than 0777, and it also allows octal numbers and backreferences + to be unambiguously distinguished. + + If braces are not used, after \0 up to two further octal digits are + read. However, if the PCRE2_EXTRA_NO_BS0 option is set, at least one + more octal digit must follow \0 (use \00 to generate a NUL character). + Make sure you supply two digits after the initial zero if the pattern + character that follows is itself an octal digit. + + Inside a character class, when a backslash is followed by any octal + digit, up to three octal digits are read to generate a code point. Any + subsequent digits stand for themselves. The sequences \8 and \9 are + treated as the literal characters "8" and "9". + + Outside a character class, Perl's handling of a backslash followed by a + digit other than 0 is complicated by ambiguity, and Perl has changed + over time, causing PCRE2 also to change. From PCRE2 release 10.45 there + is an option called PCRE2_EXTRA_PYTHON_OCTAL that causes PCRE2 to use + Python's unambiguous rules. The next two subsections describe the two + sets of rules. + + For greater clarity and unambiguity, it is best to avoid following \ by + a digit greater than zero. Instead, use \o{...} or \x{...} to specify + numerical character code points, and \g{...} to specify backreferences. + + Perl rules for non-class backslash 1-9 + + All the digits that follow the backslash are read as a decimal number. + If the number is less than 10, begins with the digit 8 or 9, or if + there are at least that many previous capture groups in the expression, + the entire sequence is taken as a back reference. Otherwise, up to + three octal digits are read to form a character code. For example: + + \040 is another way of writing an ASCII space + \40 is the same, provided there are fewer than 40 + previous capture groups + \7 is always a backreference + \11 might be a backreference, or another way of + writing a tab + \011 is always a tab + \0113 is a tab followed by the character "3" + \113 might be a backreference, otherwise the + character with octal code 113 + \377 might be a backreference, otherwise + the value 255 (decimal) + \81 is always a backreference + + Note that octal values of 100 or greater that are specified using this + syntax must not be introduced by a leading zero, because no more than + three octal digits are ever read. + + Python rules for non_class backslash 1-9 + + If there are at least three octal digits after the backslash, exactly + three are read as an octal code point number, but the value must be no + greater than \377, even in modes where higher code point values are + supported. Any subsequent digits stand for themselves. If there are + fewer than three octal digits, the sequence is taken as a decimal back + reference. Thus, for example, \12 is always a back reference, indepen- + dent of how many captures there are in the pattern. An error is gener- + ated for a reference to a non-existent capturing group. + + Constraints on character values + + Characters that are specified using octal or hexadecimal numbers are + limited to certain values, as follows: + + 8-bit non-UTF mode no greater than 0xff + 16-bit non-UTF mode no greater than 0xffff + 32-bit non-UTF mode no greater than 0xffffffff + All UTF modes no greater than 0x10ffff and a valid code point + + Invalid Unicode code points are all those in the range 0xd800 to 0xdfff + (the so-called "surrogate" code points). The check for these can be + disabled by the caller of pcre2_compile() by setting the option + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in + UTF-8 and UTF-32 modes, because these values are not representable in + UTF-16. + + Escape sequences in character classes + + All the sequences that define a single character value can be used both + inside and outside character classes. In addition, inside a character + class, \b is interpreted as the backspace character (hex 08). + + When not followed by an opening brace, \N is not allowed in a character + class. \B, \R, and \X are not special inside a character class. Like + other unrecognized alphabetic escape sequences, they cause an error. + Outside a character class, these sequences have different meanings. + + Unsupported escape sequences + + In Perl, the sequences \F, \l, \L, \u, and \U are recognized by its + string handler and used to modify the case of following characters. By + default, PCRE2 does not support these escape sequences in patterns. + However, if either of the PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX op- + tions is set, \U matches a "U" character, and \u can be used to define + a character by code point, as described above. + + Absolute and relative backreferences + + The sequence \g followed by a signed or unsigned number, optionally en- + closed in braces, is an absolute or relative backreference. A named + backreference can be coded as \g{name}. Backreferences are discussed + later, following the discussion of parenthesized groups. + + Absolute and relative subroutine calls + + For compatibility with Oniguruma, the non-Perl syntax \g followed by a + name or a number enclosed either in angle brackets or single quotes, is + an alternative syntax for referencing a capture group as a subroutine. + Details are discussed later. Note that \g{...} (Perl syntax) and + \g<...> (Oniguruma syntax) are not synonymous. The former is a backref- + erence; the latter is a subroutine call. + + Generic character types + + Another use of backslash is for specifying generic character types: + + \d any decimal digit + \D any character that is not a decimal digit + \h any horizontal white space character + \H any character that is not a horizontal white space character + \N any character that is not a newline + \s any white space character + \S any character that is not a white space character + \v any vertical white space character + \V any character that is not a vertical white space character + \w any "word" character + \W any "non-word" character + + The \N escape sequence has the same meaning as the "." metacharacter + when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change + the meaning of \N. Note that when \N is followed by an opening brace it + has a different meaning. See the section entitled "Non-printing charac- + ters" above for details. Perl also uses \N{name} to specify characters + by Unicode name; PCRE2 does not support this. + + Each pair of lower and upper case escape sequences partitions the com- + plete set of characters into two disjoint sets. Any given character + matches one, and only one, of each pair. The sequences can appear both + inside and outside character classes. They each match one character of + the appropriate type. If the current matching point is at the end of + the subject string, all of them fail, because there is no character to + match. + + The default \s characters are HT (9), LF (10), VT (11), FF (12), CR + (13), and space (32), which are defined as white space in the "C" lo- + cale. This list may vary if locale-specific matching is taking place. + For example, in some locales the "non-breaking space" character (\xA0) + is recognized as white space, and in others the VT character is not. + + A "word" character is an underscore or any character that is a letter + or digit. By default, the definition of letters and digits is con- + trolled by PCRE2's low-valued character tables, and may vary if locale- + specific matching is taking place (see "Locale support" in the pcre2api + page). For example, in a French locale such as "fr_FR" in Unix-like + systems, or "french" in Windows, some character codes greater than 127 + are used for accented letters, and these are then matched by \w. The + use of locales with Unicode is discouraged. + + By default, characters whose code points are greater than 127 never + match \d, \s, or \w, and always match \D, \S, and \W, although this may + be different for characters in the range 128-255 when locale-specific + matching is happening. These escape sequences retain their original + meanings from before Unicode support was available, mainly for effi- + ciency reasons. If the PCRE2_UCP option is set, the behaviour is + changed so that Unicode properties are used to determine character + types, as follows: + + \d any character that matches \p{Nd} (decimal digit) + \s any character that matches \p{Z} or \h or \v + \w any character that matches \p{L}, \p{N}, \p{Mn}, or \p{Pc} + + The addition of \p{Mn} (non-spacing mark) and the replacement of an ex- + plicit test for underscore with a test for \p{Pc} (connector punctua- + tion) happened in PCRE2 release 10.43. This brings PCRE2 into line with + Perl. + + The upper case escapes match the inverse sets of characters. Note that + \d matches only decimal digits, whereas \w matches any Unicode digit, + as well as other character categories. Note also that PCRE2_UCP affects + \b, and \B because they are defined in terms of \w and \W. Matching + these sequences is noticeably slower when PCRE2_UCP is set. + + The effect of PCRE2_UCP on any one of these escape sequences can be + negated by the options PCRE2_EXTRA_ASCII_BSD, PCRE2_EXTRA_ASCII_BSS, + and PCRE2_EXTRA_ASCII_BSW, respectively. These options can be set and + reset within a pattern by means of an internal option setting (see be- + low). + + The sequences \h, \H, \v, and \V, in contrast to the other sequences, + which match only ASCII characters by default, always match a specific + list of code points, whether or not PCRE2_UCP is set. The horizontal + space characters are: + + U+0009 Horizontal tab (HT) + U+0020 Space + U+00A0 Non-break space + U+1680 Ogham space mark + U+180E Mongolian vowel separator + U+2000 En quad + U+2001 Em quad + U+2002 En space + U+2003 Em space + U+2004 Three-per-em space + U+2005 Four-per-em space + U+2006 Six-per-em space + U+2007 Figure space + U+2008 Punctuation space + U+2009 Thin space + U+200A Hair space + U+202F Narrow no-break space + U+205F Medium mathematical space + U+3000 Ideographic space + + The vertical space characters are: + + U+000A Linefeed (LF) + U+000B Vertical tab (VT) + U+000C Form feed (FF) + U+000D Carriage return (CR) + U+0085 Next line (NEL) + U+2028 Line separator + U+2029 Paragraph separator + + In 8-bit, non-UTF-8 mode, only the characters with code points less + than 256 are relevant. + + Newline sequences + + Outside a character class, by default, the escape sequence \R matches + any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent + to the following: + + (?>\r\n|\n|\x0b|\f|\r|\x85) + + This is an example of an "atomic group", details of which are given be- + low. This particular group matches either the two-character sequence + CR followed by LF, or one of the single characters LF (linefeed, + U+000A), VT (vertical tab, U+000B), FF (form feed, U+000C), CR (car- + riage return, U+000D), or NEL (next line, U+0085). Because this is an + atomic group, the two-character sequence is treated as a single unit + that cannot be split. + + In other modes, two additional characters whose code points are greater + than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa- + rator, U+2029). Unicode support is not needed for these characters to + be recognized. + + It is possible to restrict \R to match only CR, LF, or CRLF (instead of + the complete set of Unicode line endings) by setting the option + PCRE2_BSR_ANYCRLF at compile time. (BSR is an abbreviation for "back- + slash R".) This can be made the default when PCRE2 is built; if this is + the case, the other behaviour can be requested via the PCRE2_BSR_UNI- + CODE option. It is also possible to specify these settings by starting + a pattern string with one of the following sequences: + + (*BSR_ANYCRLF) CR, LF, or CRLF only + (*BSR_UNICODE) any Unicode newline sequence + + These override the default and the options given to the compiling func- + tion. Note that these special settings, which are not Perl-compatible, + are recognized only at the very start of a pattern, and that they must + be in upper case. If more than one of them is present, the last one is + used. They can be combined with a change of newline convention; for ex- + ample, a pattern can start with: + + (*ANY)(*BSR_ANYCRLF) + + They can also be combined with the (*UTF) or (*UCP) special sequences. + Inside a character class, \R is treated as an unrecognized escape se- + quence, and causes an error. + + Unicode character properties + + When PCRE2 is built with Unicode support (the default), three addi- + tional escape sequences that match characters with specific properties + are available. They can be used in any mode, though in 8-bit and 16-bit + non-UTF modes these sequences are of course limited to testing charac- + ters whose code points are less than U+0100 or U+10000, respectively. + In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode + limit) may be encountered. These are all treated as being in the Un- + known script and with an unassigned type. + + Matching characters by Unicode property is not fast, because PCRE2 has + to do a multistage table lookup in order to find a character's prop- + erty. That is why the traditional escape sequences such as \d and \w do + not use Unicode properties in PCRE2 by default, though you can make + them do so by setting the PCRE2_UCP option or by starting the pattern + with (*UCP). + + The extra escape sequences that provide property support are: + + \p{xx} a character with the xx property + \P{xx} a character without the xx property + \X a Unicode extended grapheme cluster + + For compatibility with Perl, negation can be specified by including a + circumflex between the opening brace and the property. For example, + \p{^Lu} is the same as \P{Lu}. + + In accordance with Unicode's "loose matching" rules, ASCII white space + characters, hyphens, and underscores are ignored in the properties rep- + resented by xx above. As well as the space character, ASCII white space + can be tab, linefeed, vertical tab, formfeed, or carriage return. + + Some properties are specified as a name only; others as a name and a + value, separated by a colon or an equals sign. The names and values + consist of ASCII letters and digits (with one Perl-specific exception, + see below). They are not case sensitive. Note, however, that the es- + capes themselves, \p and \P, are case sensitive. There are abbrevia- + tions for many names. The following examples are all equivalent: + + \p{bidiclass=al} + \p{BC=al} + \p{ Bidi_Class : AL } + \p{ Bi-di class = Al } + \P{ ^ Bi-di class = Al } + + There is support for Unicode script names, Unicode general category + properties, "Any", which matches any character (including newline), + Bidi_Class, a number of binary (yes/no) properties, and some special + PCRE2 properties (described below). Certain other Perl properties such + as "InMusicalSymbols" are not supported by PCRE2. Note that \P{Any} + does not match any characters, so always causes a match failure. + + Script properties for \p and \P + + There are three different syntax forms for matching a script. Each Uni- + code character has a basic script and, optionally, a list of other + scripts ("Script Extensions") with which it is commonly used. Using the + Adlam script as an example, \p{sc:Adlam} matches characters whose basic + script is Adlam, whereas \p{scx:Adlam} matches, in addition, characters + that have Adlam in their extensions list. The full names "script" and + "script extensions" for the property types are recognized and, as for + all property specifications, an equals sign is an alternative to the + colon. If a script name is given without a property type, for example, + \p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this inter- + pretation at release 5.26 and PCRE2 changed at release 10.40. + + Unassigned characters (and in non-UTF 32-bit mode, characters with code + points greater than 0x10FFFF) are assigned the "Unknown" script. Others + that are not part of an identified script are lumped together as "Com- + mon". The current list of recognized script names and their 4-character + abbreviations can be obtained by running this command: + + pcre2test -LS + + + The general category property for \p and \P + + Each character has exactly one Unicode general category property, spec- + ified by a two-letter abbreviation. If only one letter is specified + with \p or \P, it includes all the general category properties that + start with that letter. In this case, in the absence of negation, the + curly brackets in the escape sequence are optional; these two examples + have the same effect: + + \p{L} + \pL + + The following general category property codes are supported: + + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate + + L Letter + Lc Cased letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark + + N Number + Nd Decimal number + Nl Letter number + No Other number + + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation + + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol + + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator + + Perl originally used the name L& for the Lc property. This is still + supported by Perl, but discouraged. PCRE2 also still supports it. This + property matches any character that has the Lu, Ll, or Lt property, in + other words, any letter that is not classified as a modifier or + "other". From release 10.45 of PCRE2 the properties Lu, Ll, and Lt are + all treated as Lc when case-independent matching is set by the + PCRE2_CASELESS option or (?i) within the pattern. The other properties + are not affected by caseless matching. + + The Cs (Surrogate) property applies only to characters whose code + points are in the range U+D800 to U+DFFF. These characters are no dif- + ferent to any other character when PCRE2 is not in UTF mode (using the + 16-bit or 32-bit library). However, they are not valid in Unicode + strings and so cannot be tested by PCRE2 in UTF mode, unless UTF valid- + ity checking has been turned off (see the discussion of + PCRE2_NO_UTF_CHECK in the pcre2api page). + + The long synonyms for property names that Perl supports (such as + \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix + any of these properties with "Is". + + No character that is in the Unicode table has the Cn (unassigned) prop- + erty. Instead, this property is assumed for any code point that is not + in the Unicode table. + + Binary (yes/no) properties for \p and \P + + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by + running this command: + + pcre2test -LP + + + The Bidi_Class property for \p and \P + + \p{Bidi_Class:} matches a character with the given class + \p{BC:} matches a character with the given class + + The recognized classes are: + + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS white space + + As in all property specifications, an equals sign may be used instead + of a colon and the class names are case-insensitive. Only the short + names listed above are recognized; PCRE2 does not at present support + any long alternatives. + + Extended grapheme clusters + + The \X escape matches any number of Unicode characters that form an + "extended grapheme cluster", and treats the sequence as an atomic group + (see below). Unicode supports various kinds of composite character by + giving each character a grapheme breaking property, and having rules + that use these properties to define the boundaries of extended grapheme + clusters. The rules are defined in Unicode Standard Annex 29, "Unicode + Text Segmentation". Unicode 11.0.0 abandoned the use of some previous + properties that had been used for emojis. Instead it introduced vari- + ous emoji-specific properties. PCRE2 uses only the Extended Picto- + graphic property. + + \X always matches at least one character. Then it decides whether to + add additional characters according to the following rules for ending a + cluster: + + 1. End at the end of the subject string. + + 2. Do not end between CR and LF; otherwise end after any control char- + acter. + + 3. Do not break Hangul (a Korean script) syllable sequences. Hangul + characters are of five types: L, V, T, LV, and LVT. An L character may + be followed by an L, V, LV, or LVT character; an LV or V character may + be followed by a V or T character; an LVT or T character may be fol- + lowed only by a T character. + + 4. Do not end before extending characters or spacing marks or the zero- + width joiner (ZWJ) character. Characters with the "mark" property al- + ways have the "extend" grapheme breaking property. + + 5. Do not end after prepend characters. + + 6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width + joiner) sequences. An emoji ZWJ sequence consists of a character with + the Extended_Pictographic property, optionally followed by one or more + characters with the Extend property, followed by the ZWJ character, + followed by another Extended_Pictographic character. + + 7. Do not break within emoji flag sequences. That is, do not break be- + tween regional indicator (RI) characters if there are an odd number of + RI characters before the break point. + + 8. Otherwise, end the cluster. + + PCRE2's additional properties + + As well as the standard Unicode properties described above, PCRE2 sup- + ports four more that make it possible to convert traditional escape se- + quences such as \w and \s to use Unicode properties. PCRE2 uses these + non-standard, non-Perl properties internally when PCRE2_UCP is set. + However, they may also be used explicitly. These properties are: + + Xan Any alphanumeric character + Xps Any POSIX space character + Xsp Any Perl space character + Xwd Any Perl "word" character + + Xan matches characters that have either the L (letter) or the N (num- + ber) property. Xps matches the characters tab, linefeed, vertical tab, + form feed, or carriage return, and any other character that has the Z + (separator) property (this includes the space character). Xsp is the + same as Xps; in PCRE1 it used to exclude vertical tab, for Perl compat- + ibility, but Perl changed. Xwd matches the same characters as Xan, plus + those that match Mn (non-spacing mark) or Pc (connector punctuation, + which includes underscore). + + There is another non-standard property, Xuc, which matches any charac- + ter that can be represented by a Universal Character Name in C++ and + other programming languages. These are the characters $, @, ` (grave + accent), and all characters with Unicode code points greater than or + equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that + most base (ASCII) characters are excluded. (Universal Character Names + are of the form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit. + Note that the Xuc property does not match these sequences but the char- + acters that they represent.) + + Resetting the match start + + In normal use, the escape sequence \K causes any previously matched + characters not to be included in the final matched sequence that is re- + turned. For example, the pattern: + + foo\Kbar + + matches "foobar", but reports that it has matched "bar". \K does not + interact with anchoring in any way. The pattern: + + ^foo\Kbar + + matches only when the subject begins with "foobar" (in single line + mode), though it again reports the matched string as "bar". This fea- + ture is similar to a lookbehind assertion (described below), but the + part of the pattern that precedes \K is not constrained to match a lim- + ited number of characters, as is required for a lookbehind assertion. + The use of \K does not interfere with the setting of captured sub- + strings. For example, when the pattern + + (foo)\Kbar + + matches "foobar", the first substring is still set to "foo". + + From version 5.32.0 Perl forbids the use of \K in lookaround asser- + tions. From release 10.38 PCRE2 also forbids this by default. However, + the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling + pcre2_compile() to re-enable the previous behaviour. When this option + is set, \K is acted upon when it occurs inside positive assertions, but + is ignored in negative assertions. Note that when a pattern such as + (?=ab\K) matches, the reported start of the match can be greater than + the end of the match. Using \K in a lookbehind assertion at the start + of a pattern can also lead to odd effects. For example, consider this + pattern: + + (?<=\Kfoo)bar + + If the subject is "foobar", a call to pcre2_match() with a starting + offset of 3 succeeds and reports the matching string as "foobar", that + is, the start of the reported match is earlier than where the match + started. + + Simple assertions + + The final use of backslash is for certain simple assertions. An asser- + tion specifies a condition that has to be met at a particular point in + a match, without consuming any characters from the subject string. The + use of groups for more complicated assertions is described below. The + backslashed assertions are: + + \b matches at a word boundary + \B matches when not at a word boundary + \A matches at the start of the subject + \Z matches at the end of the subject + also matches before a newline at the end of the subject + \z matches only at the end of the subject + \G matches at the first matching position in the subject + + Inside a character class, \b has a different meaning; it matches the + backspace character. If any other of these assertions appears in a + character class, an "invalid escape sequence" error is generated. + + A word boundary is a position in the subject string where the current + character and the previous character do not both match \w or \W (i.e. + one matches \w and the other matches \W), or the start or end of the + string if the first or last character matches \w, respectively. When + PCRE2 is built with Unicode support, the meanings of \w and \W can be + changed by setting the PCRE2_UCP option. When this is done, it also af- + fects \b and \B. Neither PCRE2 nor Perl has a separate "start of word" + or "end of word" metasequence. However, whatever follows \b normally + determines which it is. For example, the fragment \ba matches "a" at + the start of a word. + + The \A, \Z, and \z assertions differ from the traditional circumflex + and dollar (described in the next section) in that they only ever match + at the very start and end of the subject string, whatever options are + set. Thus, they are independent of multiline mode. These three asser- + tions are not affected by the PCRE2_NOTBOL or PCRE2_NOTEOL options, + which affect only the behaviour of the circumflex and dollar metachar- + acters. However, if the startoffset argument of pcre2_match() is non- + zero, indicating that matching is to start at a point other than the + beginning of the subject, \A can never match. The difference between + \Z and \z is that \Z matches before a newline at the end of the string + as well as at the very end, whereas \z matches only at the end. + + The \G assertion is true only when the current matching position is at + the start point of the matching process, as specified by the startoff- + set argument of pcre2_match(). It differs from \A when the value of + startoffset is non-zero. By calling pcre2_match() multiple times with + appropriate arguments, you can mimic Perl's /g option, and it is in + this kind of implementation where \G can be useful. + + Note, however, that PCRE2's implementation of \G, being true at the + starting character of the matching process, is subtly different from + Perl's, which defines it as true at the end of the previous match. In + Perl, these can be different when the previously matched string was + empty. Because PCRE2 does just one match at a time, it cannot reproduce + this behaviour. + + If all the alternatives of a pattern begin with \G, the expression is + anchored to the starting match position, and the "anchored" flag is set + in the compiled regular expression. + + +CIRCUMFLEX AND DOLLAR + + The circumflex and dollar metacharacters are zero-width assertions. + That is, they test for a particular condition being true without con- + suming any characters from the subject string. These two metacharacters + are concerned with matching the starts and ends of lines. If the new- + line convention is set so that only the two-character sequence CRLF is + recognized as a newline, isolated CR and LF characters are treated as + ordinary data characters, and are not recognized as newlines. + + Outside a character class, in the default matching mode, the circumflex + character is an assertion that is true only if the current matching + point is at the start of the subject string. If the startoffset argu- + ment of pcre2_match() is non-zero, or if PCRE2_NOTBOL is set, circum- + flex can never match if the PCRE2_MULTILINE option is unset. Inside a + character class, circumflex has an entirely different meaning (see be- + low). + + Circumflex need not be the first character of the pattern if a number + of alternatives are involved, but it should be the first thing in each + alternative in which it appears if the pattern is ever to match that + branch. If all possible alternatives start with a circumflex, that is, + if the pattern is constrained to match only at the start of the sub- + ject, it is said to be an "anchored" pattern. (There are also other + constructs that can cause a pattern to be anchored.) + + The dollar character is an assertion that is true only if the current + matching point is at the end of the subject string, or immediately be- + fore a newline at the end of the string (by default), unless PCRE2_NO- + TEOL is set. Note, however, that it does not actually match the new- + line. Dollar need not be the last character of the pattern if a number + of alternatives are involved, but it should be the last item in any + branch in which it appears. Dollar has no special meaning in a charac- + ter class. + + The meaning of dollar can be changed so that it matches only at the + very end of the string, by setting the PCRE2_DOLLAR_ENDONLY option at + compile time. This does not affect the \Z assertion. + + The meanings of the circumflex and dollar metacharacters are changed if + the PCRE2_MULTILINE option is set. When this is the case, a dollar + character matches before any newlines in the string, as well as at the + very end, and a circumflex matches immediately after internal newlines + as well as at the start of the subject string. It does not match after + a newline that ends the string, for compatibility with Perl. However, + this can be changed by setting the PCRE2_ALT_CIRCUMFLEX option. + + For example, the pattern /^abc$/ matches the subject string "def\nabc" + (where \n represents a newline) in multiline mode, but not otherwise. + Consequently, patterns that are anchored in single line mode because + all branches start with ^ are not anchored in multiline mode, and a + match for circumflex is possible when the startoffset argument of + pcre2_match() is non-zero. The PCRE2_DOLLAR_ENDONLY option is ignored + if PCRE2_MULTILINE is set. + + When the newline convention (see "Newline conventions" below) recog- + nizes the two-character sequence CRLF as a newline, this is preferred, + even if the single characters CR and LF are also recognized as new- + lines. For example, if the newline convention is "any", a multiline + mode circumflex matches before "xyz" in the string "abc\r\nxyz" rather + than after CR, even though CR on its own is a valid newline. (It also + matches at the very start of the string, of course.) + + Note that the sequences \A, \Z, and \z can be used to match the start + and end of the subject in both modes, and if all branches of a pattern + start with \A it is always anchored, whether or not PCRE2_MULTILINE is + set. + + +FULL STOP (PERIOD, DOT) AND \N + + Outside a character class, a dot in the pattern matches any one charac- + ter in the subject string except (by default) a character that signi- + fies the end of a line. One or more characters may be specified as line + terminators (see "Newline conventions" above). + + Dot never matches a single line-ending character. When the two-charac- + ter sequence CRLF is the only line ending, dot does not match CR if it + is immediately followed by LF, but otherwise it matches all characters + (including isolated CRs and LFs). When ANYCRLF is selected for line + endings, no occurrences of CR of LF match dot. When all Unicode line + endings are being recognized, dot does not match CR or LF or any of the + other line ending characters. + + The behaviour of dot with regard to newlines can be changed. If the + PCRE2_DOTALL option is set, a dot matches any one character, without + exception. If the two-character sequence CRLF is present in the sub- + ject string, it takes two dots to match it. + + The handling of dot is entirely independent of the handling of circum- + flex and dollar, the only relationship being that they both involve + newlines. Dot has no special meaning in a character class. + + The escape sequence \N when not followed by an opening brace behaves + like a dot, except that it is not affected by the PCRE2_DOTALL option. + In other words, it matches any character except one that signifies the + end of a line. + + When \N is followed by an opening brace it has a different meaning. See + the section entitled "Non-printing characters" above for details. Perl + also uses \N{name} to specify characters by Unicode name; PCRE2 does + not support this. + + +MATCHING A SINGLE CODE UNIT + + Outside a character class, the escape sequence \C matches any one code + unit, whether or not a UTF mode is set. In the 8-bit library, one code + unit is one byte; in the 16-bit library it is a 16-bit unit; in the + 32-bit library it is a 32-bit unit. Unlike a dot, \C always matches + line-ending characters. The feature is provided in Perl in order to + match individual bytes in UTF-8 mode, but it is unclear how it can use- + fully be used. + + Because \C breaks up characters into individual code units, matching + one unit with \C in UTF-8 or UTF-16 mode means that the rest of the + string may start with a malformed UTF character. This has undefined re- + sults, because PCRE2 assumes that it is matching character by character + in a valid UTF string (by default it checks the subject string's valid- + ity at the start of processing unless the PCRE2_NO_UTF_CHECK or + PCRE2_MATCH_INVALID_UTF option is used). + + An application can lock out the use of \C by setting the + PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also + possible to build PCRE2 with the use of \C permanently disabled. + + PCRE2 does not allow \C to appear in lookbehind assertions (described + below) in UTF-8 or UTF-16 modes, because this would make it impossible + to calculate the length of the lookbehind. Neither the alternative + matching function pcre2_dfa_match() nor the JIT optimizer support \C in + these UTF modes. The former gives a match-time error; the latter fails + to optimize and so the match is always run using the interpreter. + + In the 32-bit library, however, \C is always supported (when not ex- + plicitly locked out) because it always matches a single code unit, + whether or not UTF-32 is specified. + + In general, the \C escape sequence is best avoided. However, one way of + using it that avoids the problem of malformed UTF-8 or UTF-16 charac- + ters is to use a lookahead to check the length of the next character, + as in this pattern, which could be used with a UTF-8 string (ignore + white space and line breaks): + + (?| (?=[\x00-\x7f])(\C) | + (?=[\x80-\x{7ff}])(\C)(\C) | + (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) | + (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C)) + + In this example, a group that starts with (?| resets the capturing + parentheses numbers in each alternative (see "Duplicate Group Numbers" + below). The assertions at the start of each branch check the next UTF-8 + character for values whose encoding uses 1, 2, 3, or 4 bytes, respec- + tively. The character's individual bytes are then captured by the ap- + propriate number of \C groups. + + +SQUARE BRACKETS AND CHARACTER CLASSES + + An opening square bracket introduces a character class, terminated by a + closing square bracket. A closing square bracket on its own is not spe- + cial by default. If a closing square bracket is required as a member + of the class, it should be the first data character in the class (after + an initial circumflex, if present) or escaped with a backslash. This + means that, by default, an empty class cannot be defined. However, if + the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at + the start does end the (empty) class. + + A character class matches a single character in the subject. A matched + character must be in the set of characters defined by the class, unless + the first character in the class definition is a circumflex, in which + case the subject character must not be in the set defined by the class. + If a circumflex is actually required as a member of the class, ensure + it is not the first character, or escape it with a backslash. + + For example, the character class [aeiou] matches any lower case English + vowel, whereas [^aeiou] matches all other characters. Note that a cir- + cumflex is just a convenient notation for specifying the characters + that are in the class by enumerating those that are not. A class that + starts with a circumflex is not an assertion; it still consumes a char- + acter from the subject string, and therefore it fails to match if the + current pointer is at the end of the string. + + Characters in a class may be specified by their code points using \o, + \x, or \N{U+hh..} in the usual way. When caseless matching is set, any + letters in a class represent both their upper case and lower case ver- + sions, so for example, a caseless [aeiou] matches "A" as well as "a", + and a caseless [^aeiou] does not match "A", whereas a caseful version + would. Note that there are two ASCII characters, K and S, that, in ad- + dition to their lower case ASCII equivalents, are case-equivalent with + Unicode U+212A (Kelvin sign) and U+017F (long S) respectively when ei- + ther PCRE2_UTF or PCRE2_UCP is set. If you do not want these ASCII/non- + ASCII case equivalences, you can suppress them by setting PCRE2_EX- + TRA_CASELESS_RESTRICT, either as an option in a compile context, or by + including (*CASELESS_RESTRICT) or (?r) within a pattern. + + Characters that might indicate line breaks are never treated in any + special way when matching character classes, whatever line-ending se- + quence is in use, and whatever setting of the PCRE2_DOTALL and + PCRE2_MULTILINE options is used. A class such as [^a] always matches + one of these characters. + + The generic character type escape sequences \d, \D, \h, \H, \p, \P, \s, + \S, \v, \V, \w, and \W may appear in a character class, and add the + characters that they match to the class. For example, [\dABCDEF] + matches any hexadecimal digit. In UTF modes, the PCRE2_UCP option af- + fects the meanings of \d, \s, \w and their upper case partners, just as + it does when they appear outside a character class, as described in the + section entitled "Generic character types" above. The escape sequence + \b has a different meaning inside a character class; it matches the + backspace character. The sequences \B, \R, and \X are not special in- + side a character class. Like any other unrecognized escape sequences, + they cause an error. The same is true for \N when not followed by an + opening brace. + + The minus (hyphen) character can be used to specify a range of charac- + ters in a character class. For example, [d-m] matches any letter be- + tween d and m, inclusive. If a minus character is required in a class, + it must be escaped with a backslash or appear in a position where it + cannot be interpreted as indicating a range, typically as the first or + last character in the class, or immediately after a range. For example, + [b-d-z] matches letters in the range b to d, a hyphen character, or z. + + There is some special treatment for alphabetic ranges in EBCDIC envi- + ronments; see the section "EBCDIC environments" below. + + Perl treats a hyphen as a literal if it appears before or after a POSIX + class (see below) or before or after a character type escape such as \d + or \H. However, unless the hyphen is the last character in the class, + Perl outputs a warning in its warning mode, as this is most likely a + user error. As PCRE2 has no facility for warning, an error is given in + these cases. + + It is not possible to have the literal character "]" as the end charac- + ter of a range. A pattern such as [W-]46] is interpreted as a class of + two characters ("W" and "-") followed by a literal string "46]", so it + would match "W46]" or "-46]". However, if the "]" is escaped with a + backslash it is interpreted as the end of a range, so [W-\]46] is in- + terpreted as a class containing a range and two other characters. The + octal or hexadecimal representation of "]" can also be used to end a + range. + + Ranges normally include all code points between the start and end char- + acters, inclusive. They can also be used for code points specified nu- + merically, for example [\000-\037]. Ranges can include any characters + that are valid for the current mode. In any UTF mode, the so-called + "surrogate" characters (those whose code points lie between 0xd800 and + 0xdfff inclusive) may not be specified explicitly by default (the + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables this check). How- + ever, ranges such as [\x{d7ff}-\x{e000}], which include the surrogates, + are always permitted. + + If a range that includes letters is used when caseless matching is set, + it matches the letters in either case. For example, [W-c] is equivalent + to [][\\^_`wxyzabc], matched caselessly, and in a non-UTF mode, if + character tables for a French locale are in use, [\xc8-\xcb] matches + accented E characters in both cases. + + A circumflex can conveniently be used with the upper case character + types to specify a more restricted set of characters than the matching + lower case type. For example, the class [^\W_] matches any letter or + digit, but not underscore, whereas [\w] includes underscore. A positive + character class should be read as "something OR something OR ..." and a + negative class as "NOT something AND NOT something AND NOT ...". + + The metacharacters that are recognized in character classes are back- + slash, hyphen (when it can be interpreted as specifying a range), cir- + cumflex (only at the start), and the terminating closing square + bracket. An opening square bracket is also special when it can be in- + terpreted as introducing a POSIX class (see "Posix character classes" + below), or a special compatibility feature (see "Compatibility feature + for word boundaries" below. Escaping any non-alphanumeric character in + a class turns it into a literal, whether or not it would otherwise be a + metacharacter. + + +PERL EXTENDED CHARACTER CLASSES + + From release 10.45 PCRE2 supports Perl's (?[...]) extended character + class syntax. This can be used to perform set operations such as inter- + section on character classes. + + The syntax permitted within (?[...]) is quite different to ordinary + character classes. Inside the extended class, there is an expression + syntax consisting of "atoms", operators, and ordinary parentheses "()" + used for grouping. Such classes always have the Perl /xx modifier + (PCRE2 option PCRE2_EXTENDED_MORE) turned on within them. This means + that literal space and tab characters are ignored everywhere in the + class. + + The allowed atoms are individual characters specified by escape se- + quences such as \n or \x{123}, character types such as \d, POSIX + classes such as [:alpha:], and nested ordinary (non-extended) character + classes. For example, in (?[\d & [...]]) the nested class [...] follows + the usual rules for ordinary character classes, in which parentheses + are not metacharacters, and character literals and ranges are permit- + ted. + + Character literals and ranges may not appear outside a nested ordinary + character class because they are not atoms in the extended syntax. The + extended syntax does not introduce any additional escape sequences, so + (?[\y]) is an unknown escape, as it would be in [\y]. + + In the extended syntax, ^ does not negate a class (except within an or- + dinary class nested inside an extended class); it is instead a binary + operator. + + The binary operators are "&" (intersection), "|" or "+" (union), "-" + (subtraction) and "^" (symmetric difference). These are left-associa- + tive and "&" has higher (tighter) precedence, while the others have + equal lower precedence. The one prefix unary operator is "!" (comple- + ment), with highest precedence. + + +UTS#18 EXTENDED CHARACTER CLASSES + + The PCRE2_ALT_EXTENDED_CLASS option enables an alternative to Perl's + (?[...]) syntax, allowing instead extended class behaviour inside or- + dinary [...] character classes. This altered syntax for [...] classes + is loosely described by the Unicode standard UTS#18. The PCRE2_ALT_EX- + TENDED_CLASS option does not prevent use of (?[...]) classes; it just + changes the meaning of all [...] classes that are not nested inside a + Perl (?[...]) class. + + Firstly, in ordinary Perl [...] syntax, an expression such as "[a[]" is + a character class with two literal characters "a" and "[", but in + UTS#18 extended classes the "[" character becomes an additional + metacharacter within classes, denoting the start of a nested class, so + a literal "[" must be escaped as "\[". + + Secondly, within the UTS#18 extended syntax, there are operators "||", + "&&", "--" and "~~" which denote character class union, intersection, + subtraction, and symmetric difference respectively. In standard Perl + syntax, these would simply be needlessly-repeated literals (except for + "--" which could be the start or end of a range). In UTS#18 extended + classes these operators can be used in constructs such as [\p{L}--[QW]] + for "Unicode letters, other than Q and W". A literal "-" at the start + or end of a range must be escaped, so while "[--1]" in Perl syntax is + the range from hyphen to "1", it must be escaped as "[\--1]" in UTS#18 + extended classes. + + Unlike Perl's (?[...]) extended classes, the PCRE2_EXTENDED_MORE option + to ignore space and tab characters is not automatically enabled for + UTS#18 extended classes, but it is honoured if set. + + Extended UTS#18 classes can be nested, and nested classes are them- + selves extended classes (unlike Perl, where nested classes must be sim- + ple classes). For example, [\p{L}&&[\p{Thai}||\p{Greek}]] matches any + letter that is in the Thai or Greek scripts. Note that this means that + no special grouping characters (such as the parentheses used in Perl's + (?[...]) class syntax) are needed. + + Individual class items (literal characters, literal ranges, properties + such as \d or \p{...}, and nested classes) can be combined by juxtapo- + sition or by an operator. Juxtaposition is the implicit union operator, + and binds more tightly than any explicit operator. Thus a sequence of + literals and/or ranges behaves as if it is enclosed in square brackets. + For example, [A-Z0-9&&[^E8]] is the same as [[A-Z0-9]&&[^E8]], which + matches any upper case alphanumeric character except "E" or "8". + + Precedence between the explicit operators is not defined, so mixing op- + erators is a syntax error. For example, [A&&B--C] is an error, but + [A&&[B--C]] is valid. + + This is an emerging syntax which is being adopted gradually across the + regex ecosystem: for example JavaScript adopted the "/v" flag in EC- + MAScript 2024; Python's "re" module reserves the syntax for future use + with a FutureWarning for unescaped use of "[" as a literal within char- + acter classes. Due to UTS#18 providing insufficient guidance, engines + interpret the syntax differently. Rust's "regex" crate and Python's + "regex" PyPi module both implement UTS#18 extended classes, but with + slight incompatibilities ([A||B&&C] is parsed as [A||[B&&C]] in + Python's "regex" but as [[A||B]&&C] in Rust's "regex"). + + PCRE2's syntax adds syntax restrictions similar to ECMASCript's /v + flag, so that all the UTS#18 extended classes accepted as valid by + PCRE2 have the property that they are interpreted either with the same + behaviour, or as invalid, by all other major engines. Please file an + issue if you are aware of cross-engine differences in behaviour between + PCRE2 and another major engine. + + +POSIX CHARACTER CLASSES + + Perl supports the POSIX notation for character classes. This uses names + enclosed by [: and :] within the enclosing square brackets. PCRE2 also + supports this notation, in both ordinary and extended classes. For ex- + ample, + + [01[:alpha:]%] + + matches "0", "1", any alphabetic character, or "%". The supported class + names are: + + alnum letters and digits + alpha letters + ascii character codes 0 - 127 + blank space or tab only + cntrl control characters + digit decimal digits (same as \d) + graph printing characters, excluding space + lower lower case letters + print printing characters, including space + punct printing characters, excluding letters and digits and space + space white space (the same as \s from PCRE2 8.34) + upper upper case letters + word "word" characters (same as \w) + xdigit hexadecimal digits + + The default "space" characters are HT (9), LF (10), VT (11), FF (12), + CR (13), and space (32). If locale-specific matching is taking place, + the list of space characters may be different; there may be fewer or + more of them. "Space" and \s match the same set of characters, as do + "word" and \w. + + The name "word" is a Perl extension, and "blank" is a GNU extension + from Perl 5.8. Another Perl extension is negation, which is indicated + by a ^ character after the colon. For example, + + [12[:^digit:]] + + matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the + POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but + these are not supported, and an error is given if they are encountered. + + By default, characters with values greater than 127 do not match any of + the POSIX character classes, although this may be different for charac- + ters in the range 128-255 when locale-specific matching is happening. + However, in UCP mode, unless certain options are set (see below), some + of the classes are changed so that Unicode character properties are + used. This is achieved by replacing POSIX classes with other sequences, + as follows: + + [:alnum:] becomes \p{Xan} + [:alpha:] becomes \p{L} + [:blank:] becomes \h + [:cntrl:] becomes \p{Cc} + [:digit:] becomes \p{Nd} + [:lower:] becomes \p{Ll} + [:space:] becomes \p{Xps} + [:upper:] becomes \p{Lu} + [:word:] becomes \p{Xwd} + + Negated versions, such as [:^alpha:] use \P instead of \p. Four other + POSIX classes are handled specially in UCP mode: + + [:graph:] This matches characters that have glyphs that mark the page + when printed. In Unicode property terms, it matches all char- + acters with the L, M, N, P, S, or Cf properties, except for: + + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s + + + [:print:] This matches the same characters as [:graph:] plus space + characters that are not controls, that is, characters with + the Zs property. + + [:punct:] This matches all characters that have the Unicode P (punctua- + tion) property, plus those characters with code points less + than 256 that have the S (Symbol) property. + + [:xdigit:] + In addition to the ASCII hexadecimal digits, this also + matches the "fullwidth" versions of those characters, whose + Unicode code points start at U+FF10. This is a change that + was made in PCRE2 release 10.43 for Perl compatibility. + + The other POSIX classes are unchanged by PCRE2_UCP, and match only + characters with code points less than 256. + + There are two options that can be used to restrict the POSIX classes to + ASCII characters when PCRE2_UCP is set. The option PCRE2_EX- + TRA_ASCII_DIGIT affects just [:digit:] and [:xdigit:]. Within a pat- + tern, this can be set and unset by (?aT) and (?-aT). The PCRE2_EX- + TRA_ASCII_POSIX option disables UCP processing for all POSIX classes, + including [:digit:] and [:xdigit:]. Within a pattern, (?aP) and (?-aP) + set and unset both these options for consistency. + + +COMPATIBILITY FEATURE FOR WORD BOUNDARIES + + In the POSIX.2 compliant library that was included in 4.4BSD Unix, the + ugly syntax [[:<:]] and [[:>:]] is used for matching "start of word" + and "end of word". PCRE2 treats these items as follows: + + [[:<:]] is converted to \b(?=\w) + [[:>:]] is converted to \b(?<=\w) + + Only these exact character sequences are recognized. A sequence such as + [a[:<:]b] provokes error for an unrecognized POSIX class name. This + support is not compatible with Perl. It is provided to help migrations + from other environments, and is best not used in any new patterns. Note + that \b matches at the start and the end of a word (see "Simple asser- + tions" above), and in a Perl-style pattern the preceding or following + character normally shows which is wanted, without the need for the as- + sertions that are used above in order to give exactly the POSIX behav- + iour. Note also that the PCRE2_UCP option changes the meaning of \w + (and therefore \b) by default, so it also affects these POSIX se- + quences. + + +VERTICAL BAR + + Vertical bar characters are used to separate alternative patterns. For + example, the pattern + + gilbert|sullivan + + matches either "gilbert" or "sullivan". Any number of alternatives may + appear, and an empty alternative is permitted (matching the empty + string). The matching process tries each alternative in turn, from left + to right, and the first one that succeeds is used. If the alternatives + are within a group (defined below), "succeeds" means matching the rest + of the main pattern as well as the alternative in the group. + + +INTERNAL OPTION SETTING + + The settings of several options can be changed within a pattern by a + sequence of letters enclosed between "(?" and ")". The following are + Perl-compatible, and are described in detail in the pcre2api documenta- + tion. The option letters are: + + i for PCRE2_CASELESS + m for PCRE2_MULTILINE + n for PCRE2_NO_AUTO_CAPTURE + s for PCRE2_DOTALL + x for PCRE2_EXTENDED + xx for PCRE2_EXTENDED_MORE + + For example, (?im) sets caseless, multiline matching. It is also possi- + ble to unset these options by preceding the relevant letters with a hy- + phen, for example (?-im). The two "extended" options are not indepen- + dent; unsetting either one cancels the effects of both of them. + + A combined setting and unsetting such as (?im-sx), which sets + PCRE2_CASELESS and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and + PCRE2_EXTENDED, is also permitted. Only one hyphen may appear in the + options string. If a letter appears both before and after the hyphen, + the option is unset. An empty options setting "(?)" is allowed. Need- + less to say, it has no effect. + + If the first character following (? is a circumflex, it causes all of + the above options to be unset. Letters may follow the circumflex to + cause some options to be re-instated, but a hyphen may not appear. + + Some PCRE2-specific options can be changed by the same mechanism using + these pairs or individual letters: + + aD for PCRE2_EXTRA_ASCII_BSD + aS for PCRE2_EXTRA_ASCII_BSS + aW for PCRE2_EXTRA_ASCII_BSW + aP for PCRE2_EXTRA_ASCII_POSIX and PCRE2_EXTRA_ASCII_DIGIT + aT for PCRE2_EXTRA_ASCII_DIGIT + r for PCRE2_EXTRA_CASELESS_RESTRICT + J for PCRE2_DUPNAMES + U for PCRE2_UNGREEDY + + However, except for 'r', these are not unset by (?^), which is equiva- + lent to (?-imnrsx). If 'a' is not followed by any of the upper case + letters shown above, it sets (or unsets) all the ASCII options. + + PCRE2_EXTRA_ASCII_DIGIT has no additional effect when PCRE2_EX- + TRA_ASCII_POSIX is set, but including it in (?aP) means that (?-aP) + suppresses all ASCII restrictions for POSIX classes. + + When one of these option changes occurs at top level (that is, not in- + side group parentheses), the change applies until a subsequent change, + or the end of the pattern. An option change within a group (see below + for a description of groups) affects only that part of the group that + follows it. At the end of the group these options are reset to the + state they were before the group. For example, + + (a(?i)b)c + + matches abc and aBc and no other strings (assuming PCRE2_CASELESS is + not set externally). Any changes made in one alternative do carry on + into subsequent branches within the same group. For example, + + (a(?i)b|c) + + matches "ab", "aB", "c", and "C", even though when matching "C" the + first branch is abandoned before the option setting. This is because + the effects of option settings happen at compile time. There would be + some very weird behaviour otherwise. + + As a convenient shorthand, if any option settings are required at the + start of a non-capturing group (see the next section), the option let- + ters may appear between the "?" and the ":". Thus the two patterns + + (?i:saturday|sunday) + (?:(?i)saturday|sunday) + + match exactly the same set of strings. + + Note: There are other PCRE2-specific options, applying to the whole + pattern, which can be set by the application when the compiling func- + tion is called. In addition, the pattern can contain special leading + sequences such as (*CRLF) to override what the application has set or + what has been defaulted. Details are given in the section entitled + "Newline sequences" above. There are also the (*UTF) and (*UCP) leading + sequences that can be used to set UTF and Unicode property modes; they + are equivalent to setting the PCRE2_UTF and PCRE2_UCP options, respec- + tively. However, the application can set the PCRE2_NEVER_UTF or + PCRE2_NEVER_UCP options, which lock out the use of the (*UTF) and + (*UCP) sequences. + + +GROUPS + + Groups are delimited by parentheses (round brackets), which can be + nested. Turning part of a pattern into a group does two things: + + 1. It localizes a set of alternatives. For example, the pattern + + cat(aract|erpillar|) + + matches "cataract", "caterpillar", or "cat". Without the parentheses, + it would match "cataract", "erpillar" or an empty string. + + 2. It creates a "capture group". This means that, when the whole pat- + tern matches, the portion of the subject string that matched the group + is passed back to the caller, separately from the portion that matched + the whole pattern. (This applies only to the traditional matching + function; the DFA matching function does not support capturing.) + + Opening parentheses are counted from left to right (starting from 1) to + obtain numbers for capture groups. For example, if the string "the red + king" is matched against the pattern + + the ((red|white) (king|queen)) + + the captured substrings are "red king", "red", and "king", and are num- + bered 1, 2, and 3, respectively. + + The fact that plain parentheses fulfil two functions is not always + helpful. There are often times when grouping is required without cap- + turing. If an opening parenthesis is followed by a question mark and a + colon, the group does not do any capturing, and is not counted when + computing the number of any subsequent capture groups. For example, if + the string "the white queen" is matched against the pattern + + the ((?:red|white) (king|queen)) + + the captured substrings are "white queen" and "queen", and are numbered + 1 and 2. The maximum number of capture groups is 65535. + + As a convenient shorthand, if any option settings are required at the + start of a non-capturing group, the option letters may appear between + the "?" and the ":". Thus the two patterns + + (?i:saturday|sunday) + (?:(?i)saturday|sunday) + + match exactly the same set of strings. Because alternative branches are + tried from left to right, and options are not reset until the end of + the group is reached, an option setting in one branch does affect sub- + sequent branches, so the above patterns match "SUNDAY" as well as "Sat- + urday". + + +DUPLICATE GROUP NUMBERS + + Perl 5.10 introduced a feature whereby each alternative in a group uses + the same numbers for its capturing parentheses. Such a group starts + with (?| and is itself a non-capturing group. For example, consider + this pattern: + + (?|(Sat)ur|(Sun))day + + Because the two alternatives are inside a (?| group, both sets of cap- + turing parentheses are numbered one. Thus, when the pattern matches, + you can look at captured substring number one, whichever alternative + matched. This construct is useful when you want to capture part, but + not all, of one of a number of alternatives. Inside a (?| group, paren- + theses are numbered as usual, but the number is reset at the start of + each branch. The numbers of any capturing parentheses that follow the + whole group start after the highest number used in any branch. The fol- + lowing example is taken from the Perl documentation. The numbers under- + neath show in which buffer the captured content will be stored. + + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 + + A backreference to a capture group uses the most recent value that is + set for the group. The following pattern matches "abcabc" or "defdef": + + /(?|(abc)|(def))\1/ + + In contrast, a subroutine call to a capture group always refers to the + first one in the pattern with the given number. The following pattern + matches "abcabc" or "defabc": + + /(?|(abc)|(def))(?1)/ + + A relative reference such as (?-1) is no different: it is just a conve- + nient way of computing an absolute group number. + + If a condition test for a group's having matched refers to a non-unique + number, the test is true if any group with that number has matched. + + An alternative approach to using this "branch reset" feature is to use + duplicate named groups, as described in the next section. + + +NAMED CAPTURE GROUPS + + Identifying capture groups by number is simple, but it can be very hard + to keep track of the numbers in complicated patterns. Furthermore, if + an expression is modified, the numbers may change. To help with this + difficulty, PCRE2 supports the naming of capture groups. This feature + was not added to Perl until release 5.10. Python had the feature ear- + lier, and PCRE1 introduced it at release 4.0, using the Python syntax. + PCRE2 supports both the Perl and the Python syntax. + + In PCRE2, a capture group can be named in one of three ways: + (?...) or (?'name'...) as in Perl, or (?P...) as in Python. + Names may be up to 128 code units long. When PCRE2_UTF is not set, they + may contain only ASCII alphanumeric characters and underscores, but + must start with a non-digit. When PCRE2_UTF is set, the syntax of group + names is extended to allow any Unicode letter or Unicode decimal digit. + In other words, group names must match one of these patterns: + + ^[_A-Za-z][_A-Za-z0-9]*\z when PCRE2_UTF is not set + ^[_\p{L}][_\p{L}\p{Nd}]*\z when PCRE2_UTF is set + + References to capture groups from other parts of the pattern, such as + backreferences, recursion, and conditions, can all be made by name as + well as by number. + + Named capture groups are allocated numbers as well as names, exactly as + if the names were not present. In both PCRE2 and Perl, capture groups + are primarily identified by numbers; any names are just aliases for + these numbers. The PCRE2 API provides function calls for extracting the + complete name-to-number translation table from a compiled pattern, as + well as convenience functions for extracting captured substrings by + name. + + Warning: When more than one capture group has the same number, as de- + scribed in the previous section, a name given to one of them applies to + all of them. Perl allows identically numbered groups to have different + names. Consider this pattern, where there are two capture groups, both + numbered 1: + + (?|(?aa)|(?bb)) + + Perl allows this, with both names AA and BB as aliases of group 1. + Thus, after a successful match, both names yield the same value (either + "aa" or "bb"). + + In an attempt to reduce confusion, PCRE2 does not allow the same group + number to be associated with more than one name. The example above pro- + vokes a compile-time error. However, there is still scope for confu- + sion. Consider this pattern: + + (?|(?aa)|(bb)) + + Although the second group number 1 is not explicitly named, the name AA + is still an alias for any group 1. Whether the pattern matches "aa" or + "bb", a reference by name to group AA yields the matched string. + + By default, a name must be unique within a pattern, except that dupli- + cate names are permitted for groups with the same number, for example: + + (?|(?aa)|(?bb)) + + The duplicate name constraint can be disabled by setting the PCRE2_DUP- + NAMES option at compile time, or by the use of (?J) within the pattern, + as described in the section entitled "Internal Option Setting" above. + + Duplicate names can be useful for patterns where only one instance of + the named capture group can match. Suppose you want to match the name + of a weekday, either as a 3-letter abbreviation or as the full name, + and in both cases you want to extract the abbreviation. This pattern + (ignoring the line breaks) does the job: + + (?J) + (?Mon|Fri|Sun)(?:day)?| + (?Tue)(?:sday)?| + (?Wed)(?:nesday)?| + (?Thu)(?:rsday)?| + (?Sat)(?:urday)? + + There are five capture groups, but only one is ever set after a match. + The convenience functions for extracting the data by name returns the + substring for the first (and in this example, the only) group of that + name that matched. This saves searching to find which numbered group it + was. (An alternative way of solving this problem is to use a "branch + reset" group, as described in the previous section.) + + If you make a backreference to a non-unique named group from elsewhere + in the pattern, the groups to which the name refers are checked in the + order in which they appear in the overall pattern. The first one that + is set is used for the reference. For example, this pattern matches + both "foofoo" and "barbar" but not "foobar" or "barfoo": + + (?J)(?:(?foo)|(?bar))\k + + + If you make a subroutine call to a non-unique named group, the one that + corresponds to the first occurrence of the name is used. In the absence + of duplicate numbers this is the one with the lowest number. + + If you use a named reference in a condition test (see the section about + conditions below), either to check whether a capture group has matched, + or to check for recursion, all groups with the same name are tested. If + the condition is true for any one of them, the overall condition is + true. This is the same behaviour as testing by number. For further de- + tails of the interfaces for handling named capture groups, see the + pcre2api documentation. + + +REPETITION + + Repetition is specified by quantifiers, which may follow any one of + these items: + + a literal data character + the dot metacharacter + the \C escape sequence + the \R escape sequence + the \X escape sequence + any escape sequence that matches a single character + a character class + a backreference + a parenthesized group (including lookaround assertions) + a subroutine call (recursive or otherwise) + + If a quantifier does not follow a repeatable item, an error occurs. The + general repetition quantifier specifies a minimum and maximum number of + permitted matches by giving two numbers in curly brackets (braces), + separated by a comma. The numbers must be less than 65536, and the + first must be less than or equal to the second. For example, + + z{2,4} + + matches "zz", "zzz", or "zzzz". A closing brace on its own is not a + special character. If the second number is omitted, but the comma is + present, there is no upper limit; if the second number and the comma + are both omitted, the quantifier specifies an exact number of required + matches. Thus + + [aeiou]{3,} + + matches at least 3 successive vowels, but may match many more, whereas + + \d{8} + + matches exactly 8 digits. If the first number is omitted, the lower + limit is taken as zero; in this case the upper limit must be present. + + X{,4} is interpreted as X{0,4} + + This is a change in behaviour that happened in Perl 5.34.0 and PCRE2 + 10.43. In earlier versions such a sequence was not interpreted as a + quantifier. Other regular expression engines may behave either way. + + If the characters that follow an opening brace do not match the syntax + of a quantifier, the brace is taken as a literal character. In particu- + lar, this means that {,} is a literal string of three characters. + + Note that not every opening brace is potentially the start of a quanti- + fier because braces are used in other items such as \N{U+345} or + \k{name}. + + In UTF modes, quantifiers apply to characters rather than to individual + code units. Thus, for example, \x{100}{2} matches two characters, each + of which is represented by a two-byte sequence in a UTF-8 string. Simi- + larly, \X{3} matches three Unicode extended grapheme clusters, each of + which may be several code units long (and they may be of different + lengths). + + The quantifier {0} is permitted, causing the expression to behave as if + the previous item and the quantifier were not present. This may be use- + ful for capture groups that are referenced as subroutines from else- + where in the pattern (but see also the section entitled "Defining cap- + ture groups for use by reference only" below). Except for parenthesized + groups, items that have a {0} quantifier are omitted from the compiled + pattern. + + For convenience, the three most common quantifiers have single-charac- + ter abbreviations: + + * is equivalent to {0,} + + is equivalent to {1,} + ? is equivalent to {0,1} + + It is possible to construct infinite loops by following a group that + can match no characters with a quantifier that has no upper limit, for + example: + + (a?)* + + Earlier versions of Perl and PCRE1 used to give an error at compile + time for such patterns. However, because there are cases where this can + be useful, such patterns are now accepted, but whenever an iteration of + such a group matches no characters, matching moves on to the next item + in the pattern instead of repeatedly matching an empty string. This + does not prevent backtracking into any of the iterations if a subse- + quent item fails to match. + + By default, quantifiers are "greedy", that is, they match as much as + possible (up to the maximum number of permitted repetitions), without + causing the rest of the pattern to fail. The classic example of where + this gives problems is in trying to match comments in C programs. These + appear between /* and */ and within the comment, individual * and / + characters may appear. An attempt to match C comments by applying the + pattern + + /\*.*\*/ + + to the string + + /* first comment */ not comment /* second comment */ + + fails, because it matches the entire string owing to the greediness of + the .* item. However, if a quantifier is followed by a question mark, + it ceases to be greedy, and instead matches the minimum number of times + possible, so the pattern + + /\*.*?\*/ + + does the right thing with C comments. The meaning of the various quan- + tifiers is not otherwise changed, just the preferred number of matches. + Do not confuse this use of question mark with its use as a quantifier + in its own right. Because it has two uses, it can sometimes appear + doubled, as in + + \d??\d + + which matches one digit by preference, but can match two if that is the + only way the rest of the pattern matches. + + If the PCRE2_UNGREEDY option is set (an option that is not available in + Perl), the quantifiers are not greedy by default, but individual ones + can be made greedy by following them with a question mark. In other + words, it inverts the default behaviour. + + When a parenthesized group is quantified with a minimum repeat count + that is greater than 1 or with a limited maximum, more memory is re- + quired for the compiled pattern, in proportion to the size of the mini- + mum or maximum. + + If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option + (equivalent to Perl's /s) is set, thus allowing the dot to match new- + lines, the pattern is implicitly anchored, because whatever follows + will be tried against every character position in the subject string, + so there is no point in retrying the overall match at any position af- + ter the first. PCRE2 normally treats such a pattern as though it were + preceded by \A. + + In cases where it is known that the subject string contains no new- + lines, it is worth setting PCRE2_DOTALL in order to obtain this opti- + mization, or alternatively, using ^ to indicate anchoring explicitly. + + However, there are some cases where the optimization cannot be used. + When .* is inside capturing parentheses that are the subject of a + backreference elsewhere in the pattern, a match at the start may fail + where a later one succeeds. Consider, for example: + + (.*)abc\1 + + If the subject is "xyz123abc123" the match point is the fourth charac- + ter. For this reason, such a pattern is not implicitly anchored. + + Another case where implicit anchoring is not applied is when the lead- + ing .* is inside an atomic group. Once again, a match at the start may + fail where a later one succeeds. Consider this pattern: + + (?>.*?a)b + + It matches "ab" in the subject "aab". The use of the backtracking con- + trol verbs (*PRUNE) and (*SKIP) also disable this optimization. To do + so explicitly, either pass the compile option PCRE2_NO_DOTSTAR_ANCHOR, + or call pcre2_set_optimize() with a PCRE2_DOTSTAR_ANCHOR_OFF directive. + + When a capture group is repeated, the value captured is the substring + that matched the final iteration. For example, after + + (tweedle[dume]{3}\s*)+ + + has matched "tweedledum tweedledee" the value of the captured substring + is "tweedledee". However, if there are nested capture groups, the cor- + responding captured values may have been set in previous iterations. + For example, after + + (a|(b))+ + + matches "aba" the value of the second captured substring is "b". + + +ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS + + With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") + repetition, failure of what follows normally causes the repeated item + to be re-evaluated to see if a different number of repeats allows the + rest of the pattern to match. Sometimes it is useful to prevent this, + either to change the nature of the match, or to cause it fail earlier + than it otherwise might, when the author of the pattern knows there is + no point in carrying on. + + Consider, for example, the pattern \d+foo when applied to the subject + line + + 123456bar + + After matching all 6 digits and then failing to match "foo", the normal + action of the matcher is to try again with only 5 digits matching the + \d+ item, and then with 4, and so on, before ultimately failing. + "Atomic grouping" (a term taken from Jeffrey Friedl's book) provides + the means for specifying that once a group has matched, it is not to be + re-evaluated in this way. + + If we use atomic grouping for the previous example, the matcher gives + up immediately on failing to match "foo" the first time. The notation + is a kind of special parenthesis, starting with (?> as in this example: + + (?>\d+)foo + + Perl 5.28 introduced an experimental alphabetic form starting with (* + which may be easier to remember: + + (*atomic:\d+)foo + + This kind of parenthesized group "locks up" the part of the pattern it + contains once it has matched, and a failure further into the pattern is + prevented from backtracking into it. Backtracking past it to previous + items, however, works as normal. + + An alternative description is that a group of this type matches exactly + the string of characters that an identical standalone pattern would + match, if anchored at the current point in the subject string. + + Atomic groups are not capture groups. Simple cases such as the above + example can be thought of as a maximizing repeat that must swallow + everything it can. So, while both \d+ and \d+? are prepared to adjust + the number of digits they match in order to make the rest of the pat- + tern match, (?>\d+) can only match an entire sequence of digits. + + Atomic groups in general can of course contain arbitrarily complicated + expressions, and can be nested. However, when the contents of an atomic + group is just a single repeated item, as in the example above, a sim- + pler notation, called a "possessive quantifier" can be used. This con- + sists of an additional + character following a quantifier. Using this + notation, the previous example can be rewritten as + + \d++foo + + Note that a possessive quantifier can be used with an entire group, for + example: + + (abc|xyz){2,3}+ + + Possessive quantifiers are always greedy; the setting of the PCRE2_UN- + GREEDY option is ignored. They are a convenient notation for the sim- + pler forms of atomic group. However, there is no difference in the + meaning of a possessive quantifier and the equivalent atomic group, + though there may be a performance difference; possessive quantifiers + should be slightly faster. + + The possessive quantifier syntax is an extension to the Perl 5.8 syn- + tax. Jeffrey Friedl originated the idea (and the name) in the first + edition of his book. Mike McCloskey liked it, so implemented it when he + built Sun's Java package, and PCRE1 copied it from there. It found its + way into Perl at release 5.10. + + PCRE2 has an optimization that automatically "possessifies" certain + simple pattern constructs. For example, the sequence A+B is treated as + A++B because there is no point in backtracking into a sequence of A's + when B must follow. This feature can be disabled by the + PCRE2_NO_AUTO_POSSESS option, by calling pcre2_set_optimize() with a + PCRE2_AUTO_POSSESS_OFF directive, or by starting the pattern with + (*NO_AUTO_POSSESS). + + When a pattern contains an unlimited repeat inside a group that can it- + self be repeated an unlimited number of times, the use of an atomic + group is the only way to avoid some failing matches taking a very long + time indeed. The pattern + + (\D+|<\d+>)*[!?] + + matches an unlimited number of substrings that either consist of non- + digits, or digits enclosed in <>, followed by either ! or ?. When it + matches, it runs quickly. However, if it is applied to + + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + + it takes a long time before reporting failure. This is because the + string can be divided between the internal \D+ repeat and the external + * repeat in a large number of ways, and all have to be tried. (The ex- + ample uses [!?] rather than a single character at the end, because both + PCRE2 and Perl have an optimization that allows for fast failure when a + single character is used. They remember the last single character that + is required for a match, and fail early if it is not present in the + string.) If the pattern is changed so that it uses an atomic group, + like this: + + ((?>\D+)|<\d+>)*[!?] + + sequences of non-digits cannot be broken, and failure happens quickly. + + +BACKREFERENCES + + Outside a character class, a backslash followed by a digit greater than + 0 (and possibly further digits) is a backreference to a capture group + earlier (that is, to its left) in the pattern, provided there have been + that many previous capture groups. + + However, if the decimal number following the backslash is less than 8, + it is always taken as a backreference, and causes an error only if + there are not that many capture groups in the entire pattern. In other + words, the group that is referenced need not be to the left of the ref- + erence for numbers less than 8. A "forward backreference" of this type + can make sense when a repetition is involved and the group to the right + has participated in an earlier iteration. + + It is not possible to have a numerical "forward backreference" to a + group whose number is 8 or more using this syntax because a sequence + such as \50 is interpreted as a character defined in octal. See the + subsection entitled "Non-printing characters" above for further details + of the handling of digits following a backslash. Other forms of back- + referencing do not suffer from this restriction. In particular, there + is no problem when named capture groups are used (see below). + + Another way of avoiding the ambiguity inherent in the use of digits + following a backslash is to use the \g escape sequence. This escape + must be followed by a signed or unsigned number, optionally enclosed in + braces. These examples are all identical: + + (ring), \1 + (ring), \g1 + (ring), \g{1} + + An unsigned number specifies an absolute reference without the ambigu- + ity that is present in the older syntax. It is also useful when literal + digits follow the reference. A signed number is a relative reference. + Consider this example: + + (abc(def)ghi)\g{-1} + + The sequence \g{-1} is a reference to the capture group whose number is + one less than the number of the next group to be started, so in this + example (where the next group would be numbered 3) is it equivalent to + \2, and \g{-2} would be equivalent to \1. Note that if this construct + is inside a capture group, that group is included in the count, so in + this example \g{-2} also refers to group 1: + + (A)(\g{-2}B) + + The use of relative references can be helpful in long patterns, and + also in patterns that are created by joining together fragments that + contain references within themselves. + + The sequence \g{+1} is a reference to the next capture group that is + started after this item, and \g{+2} refers to the one after that, and + so on. This kind of forward reference can be useful in patterns that + repeat. Perl does not support the use of + in this way. + + A backreference matches whatever actually most recently matched the + capture group in the current subject string, rather than anything at + all that matches the group (see "Groups as subroutines" below for a way + of doing that). So the pattern + + (sens|respons)e and \1ibility + + matches "sense and sensibility" and "response and responsibility", but + not "sense and responsibility". If caseful matching is in force at the + time of the backreference, the case of letters is relevant. For exam- + ple, + + ((?i)rah)\s+\1 + + matches "rah rah" and "RAH RAH", but not "RAH rah", even though the + original capture group is matched caselessly. + + There are several different ways of writing backreferences to named + capture groups. The .NET syntax is \k{name}, the Python syntax is + (?=name), and the original Perl syntax is \k or \k'name'. All of + these are now supported by both Perl and PCRE2. Perl 5.10's unified + backreference syntax, in which \g can be used for both numeric and + named references, is also supported by PCRE2. We could rewrite the + above example in any of the following ways: + + (?(?i)rah)\s+\k + (?'p1'(?i)rah)\s+\k{p1} + (?P(?i)rah)\s+(?P=p1) + (?(?i)rah)\s+\g{p1} + + A capture group that is referenced by name may appear in the pattern + before or after the reference. + + There may be more than one backreference to the same group. If a group + has not actually been used in a particular match, backreferences to it + always fail by default. For example, the pattern + + (a|(bc))\2 + + always fails if it starts to match "a" rather than "bc". However, if + the PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backref- + erence to an unset value matches an empty string. + + Because there may be many capture groups in a pattern, all digits fol- + lowing a backslash are taken as part of a potential backreference num- + ber. If the pattern continues with a digit character, some delimiter + must be used to terminate the backreference. If the PCRE2_EXTENDED or + PCRE2_EXTENDED_MORE option is set, this can be white space. Otherwise, + the \g{} syntax or an empty comment (see "Comments" below) can be used. + + Recursive backreferences + + A backreference that occurs inside the group to which it refers fails + when the group is first used, so, for example, (a\1) never matches. + However, such references can be useful inside repeated groups. For ex- + ample, the pattern + + (a|b\1)+ + + matches any number of "a"s and also "aba", "ababbaa" etc. At each iter- + ation of the group, the backreference matches the character string cor- + responding to the previous iteration. In order for this to work, the + pattern must be such that the first iteration does not need to match + the backreference. This can be done using alternation, as in the exam- + ple above, or by a quantifier with a minimum of zero. + + For versions of PCRE2 less than 10.25, backreferences of this type used + to cause the group that they reference to be treated as an atomic + group. This restriction no longer applies, and backtracking into such + groups can occur as normal. + + +ASSERTIONS + + An assertion is a test that does not consume any characters. The test + must succeed for the match to continue. The simple assertions coded as + \b, \B, \A, \G, \Z, \z, ^ and $ are described above. + + More complicated assertions are coded as parenthesized groups. If + matching such a group succeeds, matching continues after it, but with + the matching position in the subject string reset to what it was before + the assertion was processed. + + A special kind of assertion, called a "scan substring" assertion, + matches a subpattern against a previously captured substring. This is + described in the section entitled "Scan substring assertions" below. It + is a PCRE2 extension, not compatible with Perl. + + The other goup-based assertions are of two kinds: those that look ahead + of the current position in the subject string, and those that look be- + hind it, and in each case an assertion may be positive (must match for + the assertion to be true) or negative (must not match for the assertion + to be true). + + The Perl-compatible lookaround assertions are atomic. If an assertion + is true, but there is a subsequent matching failure, there is no back- + tracking into the assertion. However, there are some cases where non- + atomic assertions can be useful. PCRE2 has some support for these, de- + scribed in the section entitled "Non-atomic assertions" below, but they + are not Perl-compatible. + + A lookaround assertion may appear as the condition in a conditional + group (see below). In this case, the result of matching the assertion + determines which branch of the condition is followed. + + Assertion groups are not capture groups. If an assertion contains cap- + ture groups within it, these are counted for the purposes of numbering + the capture groups in the whole pattern. Within each branch of an as- + sertion, locally captured substrings may be referenced in the usual + way. For example, a sequence such as (.)\g{-1} can be used to check + that two adjacent characters are the same. + + When a branch within an assertion fails to match, any substrings that + were captured are discarded (as happens with any pattern branch that + fails to match). A negative assertion is true only when all its + branches fail to match; this means that no captured substrings are ever + retained after a successful negative assertion. When an assertion con- + tains a matching branch, what happens depends on the type of assertion. + + For a positive assertion, internally captured substrings in the suc- + cessful branch are retained, and matching continues with the next pat- + tern item after the assertion. For a negative assertion, a matching + branch means that the assertion is not true. If such an assertion is + being used as a condition in a conditional group (see below), captured + substrings are retained, because matching continues with the "no" + branch of the condition. For other failing negative assertions, control + passes to the previous backtracking point, thus discarding any captured + strings within the assertion. + + Most assertion groups may be repeated; though it makes no sense to as- + sert the same thing several times, the side effect of capturing in pos- + itive assertions may occasionally be useful. However, an assertion that + forms the condition for a conditional group may not be quantified. + PCRE2 used to restrict the repetition of assertions, but from release + 10.35 the only restriction is that an unlimited maximum repetition is + changed to be one more than the minimum. For example, {3,} is treated + as {3,4}. + + Alphabetic assertion names + + Traditionally, symbolic sequences such as (?= and (?<= have been used + to specify lookaround assertions. Perl 5.28 introduced some experimen- + tal alphabetic alternatives which might be easier to remember. They all + start with (* instead of (? and must be written using lower case let- + ters. PCRE2 supports the following synonyms: + + (*positive_lookahead: or (*pla: is the same as (?= + (*negative_lookahead: or (*nla: is the same as (?! + (*positive_lookbehind: or (*plb: is the same as (?<= + (*negative_lookbehind: or (*nlb: is the same as (? .*? \b\1\b ){2} + + For a subject such as "word1 word2 word3 word2 word3 word4" the result + is "word3". How does it work? At the start, ^(?x) anchors the pattern + and sets the "x" option, which causes white space (introduced for read- + ability) to be ignored. Inside the assertion, the greedy .* at first + consumes the entire string, but then has to backtrack until the rest of + the assertion can match a word, which is captured by group 1. In other + words, when the assertion first succeeds, it captures the right-most + word in the string. + + The current matching point is then reset to the start of the subject, + and the rest of the pattern match checks for two occurrences of the + captured word, using an ungreedy .*? to scan from the left. If this + succeeds, we are done, but if the last word in the string does not oc- + cur twice, this part of the pattern fails. If a traditional atomic + lookahead (?= or (*pla: had been used, the assertion could not be re- + entered, and the whole match would fail. The pattern would succeed only + if the very last word in the subject was found twice. + + Using a non-atomic lookahead, however, means that when the last word + does not occur twice in the string, the lookahead can backtrack and + find the second-last word, and so on, until either the match succeeds, + or all words have been tested. + + Two conditions must be met for a non-atomic assertion to be useful: the + contents of one or more capturing groups must change after a backtrack + into the assertion, and there must be a backreference to a changed + group later in the pattern. If this is not the case, the rest of the + pattern match fails exactly as before because nothing has changed, so + using a non-atomic assertion just wastes resources. + + There is one exception to backtracking into a non-atomic assertion. If + an (*ACCEPT) control verb is triggered, the assertion succeeds atomi- + cally. That is, a subsequent match failure cannot backtrack into the + assertion. + + Non-atomic assertions are not supported by the alternative matching + function pcre2_dfa_match(). They are supported by JIT, but only if they + do not contain any control verbs such as (*ACCEPT). (This may change in + future). Note that assertions that appear as conditions for conditional + groups (see below) must be atomic. + + +SCAN SUBSTRING ASSERTIONS + + A special kind of assertion, not compatible with Perl, makes it possi- + ble to check the contents of a captured substring by matching it with a + subpattern. Because this involves capturing, this feature is not sup- + ported by pcre2_dfa_match(). + + A scan substring assertion starts with the sequence (*scan_substring: + or (*scs: which is followed by a list of substring numbers (absolute or + relative) and/or substring names enclosed in single quotes or angle + brackets, all within parentheses. The rest of the item is the subpat- + tern that is applied to the substring, as shown in these examples: + + (*scan_substring:(1)...) + (*scs:(-2)...) + (*scs:('AB')...) + (*scs:(1,'AB',-2)...) + + The list of groups is checked in the order they are given, and it is + the contents of the first one that is found to be set that are scanned. + When PCRE2_DUPNAMES is set and there are ambiguous group names, all + groups with the same name are checked in numerical order. A scan sub- + string assertion fails if none of the groups it references have been + set. + + The pattern match on the substring is always anchored, that is, it must + match from the start of the substring. There is no "bumpalong" if it + does not match at the start. The end of the subject is temporarily re- + set to be the end of the substring, so \Z, \z, and $ will match there. + However, the start of the subject is not reset. This means that ^ + matches only if the substring is actually at the start of the main sub- + ject, but it also means that lookbehind assertions into what precedes + the substring are possible. + + Here is a very simple example: find a word that contains the rare (in + English) sequence of letters "rh" not at the start: + + \b(\w++)(*scs:(1).+rh) + + The first group captures a word which is then scanned by the second + group. This example does not actually need this heavyweight feature; + the same match can be achieved with: + + \b\w+?rh\w*\b + + When things are more complicated, however, scanning a captured sub- + string can be a useful way to describe the required match. For exmple, + there is a rather complicated pattern in the PCRE2 test data that + checks an entire subject string for a palindrome, that is, the sequence + of letters is the same in both directions. Suppose you want to search + for individual words of two or more characters such as "level" that are + palindromes: + + (\b\w{2,}+\b)(*scs:(1)...palindrome-matching-pattern...) + + Within a substring scanning subpattern, references to other groups work + as normal. Capturing groups may appear, and will retain their values + during ongoing matching if the assertion succeeds. + + +SCRIPT RUNS + + In concept, a script run is a sequence of characters that are all from + the same Unicode script such as Latin or Greek. However, because some + scripts are commonly used together, and because some diacritical and + other marks are used with multiple scripts, it is not that simple. + There is a full description of the rules that PCRE2 uses in the section + entitled "Script Runs" in the pcre2unicode documentation. + + If part of a pattern is enclosed between (*script_run: or (*sr: and a + closing parenthesis, it fails if the sequence of characters that it + matches are not a script run. After a failure, normal backtracking oc- + curs. Script runs can be used to detect spoofing attacks using charac- + ters that look the same, but are from different scripts. The string + "paypal.com" is an infamous example, where the letters could be a mix- + ture of Latin and Cyrillic. This pattern ensures that the matched char- + acters in a sequence of non-spaces that follow white space are a script + run: + + \s+(*sr:\S+) + + To be sure that they are all from the Latin script (for example), a + lookahead can be used: + + \s+(?=\p{Latin})(*sr:\S+) + + This works as long as the first character is expected to be a character + in that script, and not (for example) punctuation, which is allowed + with any script. If this is not the case, a more creative lookahead is + needed. For example, if digits, underscore, and dots are permitted at + the start: + + \s+(?=[0-9_.]*\p{Latin})(*sr:\S+) + + + In many cases, backtracking into a script run pattern fragment is not + desirable. The script run can employ an atomic group to prevent this. + Because this is a common requirement, a shorthand notation is provided + by (*atomic_script_run: or (*asr: + + (*asr:...) is the same as (*sr:(?>...)) + + Note that the atomic group is inside the script run. Putting it outside + would not prevent backtracking into the script run pattern. + + Support for script runs is not available if PCRE2 is compiled without + Unicode support. A compile-time error is given if any of the above con- + structs is encountered. Script runs are not supported by the alternate + matching function, pcre2_dfa_match() because they use the same mecha- + nism as capturing parentheses. + + Warning: The (*ACCEPT) control verb (see below) should not be used + within a script run group, because it causes an immediate exit from the + group, bypassing the script run checking. + + +CONDITIONAL GROUPS + + It is possible to cause the matching process to obey a pattern fragment + conditionally or to choose between two alternative fragments, depending + on the result of an assertion, or whether a specific capture group has + already been matched. The two possible forms of conditional group are: + + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) + + If the condition is satisfied, the yes-pattern is used; otherwise the + no-pattern (if present) is used. An absent no-pattern is equivalent to + an empty string (it always matches). If there are more than two alter- + natives in the group, a compile-time error occurs. Each of the two al- + ternatives may itself contain nested groups of any form, including con- + ditional groups; the restriction to two alternatives applies only at + the level of the condition itself. This pattern fragment is an example + where the alternatives are complex: + + (?(1) (A|B|C) | (D | (?(2)E|F) | E) ) + + + There are five kinds of condition: references to capture groups, refer- + ences to recursion, two pseudo-conditions called DEFINE and VERSION, + and assertions. + + Checking for a used capture group by number + + If the text between the parentheses consists of a sequence of digits, + the condition is true if a capture group of that number has previously + matched. If there is more than one capture group with the same number + (see the earlier section about duplicate group numbers), the condition + is true if any of them have matched. An alternative notation, which is + a PCRE2 extension, not supported by Perl, is to precede the digits with + a plus or minus sign. In this case, the group number is relative rather + than absolute. The most recently opened capture group (which could be + enclosing this condition) can be referenced by (?(-1), the next most + recent by (?(-2), and so on. Inside loops it can also make sense to re- + fer to subsequent groups. The next capture group to be opened can be + referenced as (?(+1), and so on. The value zero in any of these forms + is not used; it provokes a compile-time error. + + Consider the following pattern, which contains non-significant white + space to make it more readable (assume the PCRE2_EXTENDED option) and + to divide it into three parts for ease of discussion: + + ( \( )? [^()]+ (?(1) \) ) + + The first part matches an optional opening parenthesis, and if that + character is present, sets it as the first captured substring. The sec- + ond part matches one or more characters that are not parentheses. The + third part is a conditional group that tests whether or not the first + capture group matched. If it did, that is, if subject started with an + opening parenthesis, the condition is true, and so the yes-pattern is + executed and a closing parenthesis is required. Otherwise, since no- + pattern is not present, the conditional group matches nothing. In other + words, this pattern matches a sequence of non-parentheses, optionally + enclosed in parentheses. + + If you were embedding this pattern in a larger one, you could use a + relative reference: + + ...other stuff... ( \( )? [^()]+ (?(-1) \) ) ... + + This makes the fragment independent of the parentheses in the larger + pattern. + + Checking for a used capture group by name + + Perl uses the syntax (?()...) or (?('name')...) to test for a + used capture group by name. For compatibility with earlier versions of + PCRE1, which had this facility before Perl, the syntax (?(name)...) is + also recognized. Note, however, that undelimited names consisting of + the letter R followed by digits are ambiguous (see the following sec- + tion). Rewriting the above example to use a named group gives this: + + (? \( )? [^()]+ (?() \) ) + + If the name used in a condition of this kind is a duplicate, the test + is applied to all groups of the same name, and is true if any one of + them has matched. + + Checking for pattern recursion + + "Recursion" in this sense refers to any subroutine-like call from one + part of the pattern to another, whether or not it is actually recur- + sive. See the sections entitled "Recursive patterns" and "Groups as + subroutines" below for details of recursion and subroutine calls. + + If a condition is the string (R), and there is no capture group with + the name R, the condition is true if matching is currently in a recur- + sion or subroutine call to the whole pattern or any capture group. If + digits follow the letter R, and there is no group with that name, the + condition is true if the most recent call is into a group with the + given number, which must exist somewhere in the overall pattern. This + is a contrived example that is equivalent to a+b: + + ((?(R1)a+|(?1)b)) + + However, in both cases, if there is a capture group with a matching + name, the condition tests for its being set, as described in the sec- + tion above, instead of testing for recursion. For example, creating a + group with the name R1 by adding (?) to the above pattern com- + pletely changes its meaning. + + If a name preceded by ampersand follows the letter R, for example: + + (?(R&name)...) + + the condition is true if the most recent recursion is into a group of + that name (which must exist within the pattern). + + This condition does not check the entire recursion stack. It tests only + the current level. If the name used in a condition of this kind is a + duplicate, the test is applied to all groups of the same name, and is + true if any one of them is the most recent recursion. + + At "top level", all these recursion test conditions are false. + + Defining capture groups for use by reference only + + If the condition is the string (DEFINE), the condition is always false, + even if there is a group with the name DEFINE. In this case, there may + be only one alternative in the rest of the conditional group. It is al- + ways skipped if control reaches this point in the pattern; the idea of + DEFINE is that it can be used to define subroutines that can be refer- + enced from elsewhere. (The use of subroutines is described below.) For + example, a pattern to match an IPv4 address such as "192.168.23.245" + could be written like this (ignore white space and line breaks): + + (?(DEFINE) (? 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) ) + \b (?&byte) (\.(?&byte)){3} \b + + The first part of the pattern is a DEFINE group inside which another + group named "byte" is defined. This matches an individual component of + an IPv4 address (a number less than 256). When matching takes place, + this part of the pattern is skipped because DEFINE acts like a false + condition. The rest of the pattern uses references to the named group + to match the four dot-separated components of an IPv4 address, insist- + ing on a word boundary at each end. + + Checking the PCRE2 version + + Programs that link with a PCRE2 library can check the version by call- + ing pcre2_config() with appropriate arguments. Users of applications + that do not have access to the underlying code cannot do this. A spe- + cial "condition" called VERSION exists to allow such users to discover + which version of PCRE2 they are dealing with by using this condition to + match a string such as "yesno". VERSION must be followed either by "=" + or ">=" and a version number. For example: + + (?(VERSION>=10.4)yes|no) + + This pattern matches "yes" if the PCRE2 version is greater or equal to + 10.4, or "no" otherwise. The fractional part of the version number may + not contain more than two digits. + + Assertion conditions + + If the condition is not in any of the above formats, it must be a + parenthesized assertion. This may be a positive or negative lookahead + or lookbehind assertion. However, it must be a traditional atomic as- + sertion, not one of the non-atomic assertions. + + Consider this pattern, again containing non-significant white space, + and with the two alternatives on the second line: + + (?(?=[^a-z]*[a-z]) + \d{2}-[a-z]{3}-\d{2} | \d{2}-\d{2}-\d{2} ) + + The condition is a positive lookahead assertion that matches an op- + tional sequence of non-letters followed by a letter. In other words, it + tests for the presence of at least one letter in the subject. If a let- + ter is found, the subject is matched against the first alternative; + otherwise it is matched against the second. This pattern matches + strings in one of the two forms dd-aaa-dd or dd-dd-dd, where aaa are + letters and dd are digits. + + When an assertion that is a condition contains capture groups, any cap- + turing that occurs in a matching branch is retained afterwards, for + both positive and negative assertions, because matching always contin- + ues after the assertion, whether it succeeds or fails. (Compare non- + conditional assertions, for which captures are retained only for posi- + tive assertions that succeed.) + + +COMMENTS + + There are two ways of including comments in patterns that are processed + by PCRE2. In both cases, the start of the comment must not be in a + character class, nor in the middle of any other sequence of related + characters such as (?: or a group name or number or a Unicode property + name. The characters that make up a comment play no part in the pattern + matching. + + The sequence (?# marks the start of a comment that continues up to the + next closing parenthesis. Nested parentheses are not permitted. If the + PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # + character also introduces a comment, which in this case continues to + immediately after the next newline character or character sequence in + the pattern. Which characters are interpreted as newlines is controlled + by an option passed to the compiling function or by a special sequence + at the start of the pattern, as described in the section entitled "New- + line conventions" above. Note that the end of this type of comment is a + literal newline sequence in the pattern; escape sequences that happen + to represent a newline do not count. For example, consider this pattern + when PCRE2_EXTENDED is set, and the default newline convention (a sin- + gle linefeed character) is in force: + + abc #comment \n still comment + + On encountering the # character, pcre2_compile() skips along, looking + for a newline in the pattern. The sequence \n is still literal at this + stage, so it does not terminate the comment. Only an actual character + with the code value 0x0a (the default newline) does so. + + +RECURSIVE PATTERNS + + Consider the problem of matching a string in parentheses, allowing for + unlimited nested parentheses. Without the use of recursion, the best + that can be done is to use a pattern that matches up to some fixed + depth of nesting. It is not possible to handle an arbitrary nesting + depth. + + For some time, Perl has provided a facility that allows regular expres- + sions to recurse (amongst other things). It does this by interpolating + Perl code in the expression at run time, and the code can refer to the + expression itself. A Perl pattern using code interpolation to solve the + parentheses problem can be created like this: + + $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x; + + The (?p{...}) item interpolates Perl code at run time, and in this case + refers recursively to the pattern in which it appears. + + Obviously, PCRE2 cannot support the interpolation of Perl code. In- + stead, it supports special syntax for recursion of the entire pattern, + and also for individual capture group recursion. After its introduction + in PCRE1 and Python, this kind of recursion was subsequently introduced + into Perl at release 5.10. + + A special item that consists of (? followed by a number greater than + zero and a closing parenthesis is a recursive subroutine call of the + capture group of the given number, provided that it occurs inside that + group. (If not, it is a non-recursive subroutine call, which is de- + scribed in the next section.) The special item (?R) or (?0) is a recur- + sive call of the entire regular expression. + + This PCRE2 pattern solves the nested parentheses problem (assume the + PCRE2_EXTENDED option is set so that white space is ignored): + + \( ( [^()]++ | (?R) )* \) + + First it matches an opening parenthesis. Then it matches any number of + substrings which can either be a sequence of non-parentheses, or a re- + cursive match of the pattern itself (that is, a correctly parenthesized + substring). Finally there is a closing parenthesis. Note the use of a + possessive quantifier to avoid backtracking into sequences of non- + parentheses. + + If this were part of a larger pattern, you would not want to recurse + the entire pattern, so instead you could use this: + + ( \( ( [^()]++ | (?1) )* \) ) + + We have put the pattern into parentheses, and caused the recursion to + refer to them instead of the whole pattern. + + In a larger pattern, keeping track of parenthesis numbers can be + tricky. This is made easier by the use of relative references. Instead + of (?1) in the pattern above you can write (?-2) to refer to the second + most recently opened parentheses preceding the recursion. In other + words, a negative number counts capturing parentheses leftwards from + the point at which it is encountered. + + Be aware however, that if duplicate capture group numbers are in use, + relative references refer to the earliest group with the appropriate + number. Consider, for example: + + (?|(a)|(b)) (c) (?-2) + + The first two capture groups (a) and (b) are both numbered 1, and group + (c) is number 2. When the reference (?-2) is encountered, the second + most recently opened parentheses has the number 1, but it is the first + such group (the (a) group) to which the recursion refers. This would be + the same if an absolute reference (?1) was used. In other words, rela- + tive references are just a shorthand for computing a group number. + + It is also possible to refer to subsequent capture groups, by writing + references such as (?+2). However, these cannot be recursive because + the reference is not inside the parentheses that are referenced. They + are always non-recursive subroutine calls, as described in the next + section. + + An alternative approach is to use named parentheses. The Perl syntax + for this is (?&name); PCRE1's earlier syntax (?P>name) is also sup- + ported. We could rewrite the above example as follows: + + (? \( ( [^()]++ | (?&pn) )* \) ) + + If there is more than one group with the same name, the earliest one is + used. + + The example pattern that we have been looking at contains nested unlim- + ited repeats, and so the use of a possessive quantifier for matching + strings of non-parentheses is important when applying the pattern to + strings that do not match. For example, when this pattern is applied to + + (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() + + it yields "no match" quickly. However, if a possessive quantifier is + not used, the match runs for a very long time indeed because there are + so many different ways the + and * repeats can carve up the subject, + and all have to be tested before failure can be reported. + + At the end of a match, the values of capturing parentheses are those + from the outermost level. If you want to obtain intermediate values, a + callout function can be used (see below and the pcre2callout documenta- + tion). If the pattern above is matched against + + (ab(cd)ef) + + the value for the inner capturing parentheses (numbered 2) is "ef", + which is the last value taken on at the top level. If a capture group + is not matched at the top level, its final captured value is unset, + even if it was (temporarily) set at a deeper level during the matching + process. + + Do not confuse the (?R) item with the condition (R), which tests for + recursion. Consider this pattern, which matches text in angle brack- + ets, allowing for arbitrary nesting. Only digits are allowed in nested + brackets (that is, when recursing), whereas any characters are permit- + ted at the outer level. + + < (?: (?(R) \d++ | [^<>]*+) | (?R)) * > + + In this pattern, (?(R) is the start of a conditional group, with two + different alternatives for the recursive and non-recursive cases. The + (?R) item is the actual recursive call. + + Differences in recursion processing between PCRE2 and Perl + + Some former differences between PCRE2 and Perl no longer exist. + + Before release 10.30, recursion processing in PCRE2 differed from Perl + in that a recursive subroutine call was always treated as an atomic + group. That is, once it had matched some of the subject string, it was + never re-entered, even if it contained untried alternatives and there + was a subsequent matching failure. (Historical note: PCRE implemented + recursion before Perl did.) + + Starting with release 10.30, recursive subroutine calls are no longer + treated as atomic. That is, they can be re-entered to try unused alter- + natives if there is a matching failure later in the pattern. This is + now compatible with the way Perl works. If you want a subroutine call + to be atomic, you must explicitly enclose it in an atomic group. + + Supporting backtracking into recursions simplifies certain types of re- + cursive pattern. For example, this pattern matches palindromic strings: + + ^((.)(?1)\2|.?)$ + + The second branch in the group matches a single central character in + the palindrome when there are an odd number of characters, or nothing + when there are an even number of characters, but in order to work it + has to be able to try the second case when the rest of the pattern + match fails. If you want to match typical palindromic phrases, the pat- + tern has to ignore all non-word characters, which can be done like + this: + + ^\W*+((.)\W*+(?1)\W*+\2|\W*+.?)\W*+$ + + If run with the PCRE2_CASELESS option, this pattern matches phrases + such as "A man, a plan, a canal: Panama!". Note the use of the posses- + sive quantifier *+ to avoid backtracking into sequences of non-word + characters. Without this, PCRE2 takes a great deal longer (ten times or + more) to match typical phrases, and Perl takes so long that you think + it has gone into a loop. + + Another way in which PCRE2 and Perl used to differ in their recursion + processing is in the handling of captured values. Formerly in Perl, + when a group was called recursively or as a subroutine (see the next + section), it had no access to any values that were captured outside the + recursion, whereas in PCRE2 these values can be referenced. Consider + this pattern: + + ^(.)(\1|a(?2)) + + This pattern matches "bab". The first capturing parentheses match "b", + then in the second group, when the backreference \1 fails to match "b", + the second alternative matches "a" and then recurses. In the recursion, + \1 does now match "b" and so the whole match succeeds. This match used + to fail in Perl, but in later versions (I tried 5.024) it now works. + + +GROUPS AS SUBROUTINES + + If the syntax for a recursive group call (either by number or by name) + is used outside the parentheses to which it refers, it operates a bit + like a subroutine in a programming language. More accurately, PCRE2 + treats the referenced group as an independent subpattern which it tries + to match at the current matching position. The called group may be de- + fined before or after the reference. A numbered reference can be ab- + solute or relative, as in these examples: + + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... + + An earlier example pointed out that the pattern + + (sens|respons)e and \1ibility + + matches "sense and sensibility" and "response and responsibility", but + not "sense and responsibility". If instead the pattern + + (sens|respons)e and (?1)ibility + + is used, it does match "sense and responsibility" as well as the other + two strings. Another example is given in the discussion of DEFINE + above. + + Like recursions, subroutine calls used to be treated as atomic, but + this changed at PCRE2 release 10.30, so backtracking into subroutine + calls can now occur. However, any capturing parentheses that are set + during the subroutine call revert to their previous values afterwards. + + Processing options such as case-independence are fixed when a group is + defined, so if it is used as a subroutine, such options cannot be + changed for different calls. For example, consider this pattern: + + (abc)(?i:(?-1)) + + It matches "abcabc". It does not match "abcABC" because the change of + processing option does not affect the called group. + + The behaviour of backtracking control verbs in groups when called as + subroutines is described in the section entitled "Backtracking verbs in + subroutines" below. + + +ONIGURUMA SUBROUTINE SYNTAX + + For compatibility with Oniguruma, the non-Perl syntax \g followed by a + name or a number enclosed either in angle brackets or single quotes, is + an alternative syntax for calling a group as a subroutine, possibly re- + cursively. Here are two of the examples used above, rewritten using + this syntax: + + (? \( ( (?>[^()]+) | \g )* \) ) + (sens|respons)e and \g'1'ibility + + PCRE2 supports an extension to Oniguruma: if a number is preceded by a + plus or a minus sign it is taken as a relative reference. For example: + + (abc)(?i:\g<-1>) + + Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not + synonymous. The former is a backreference; the latter is a subroutine + call. + + +CALLOUTS + + Perl has a feature whereby using the sequence (?{...}) causes arbitrary + Perl code to be obeyed in the middle of matching a regular expression. + This makes it possible, amongst other things, to extract different sub- + strings that match the same pair of parentheses when there is a repeti- + tion. + + PCRE2 provides a similar feature, but of course it cannot obey arbi- + trary Perl code. The feature is called "callout". The caller of PCRE2 + provides an external function by putting its entry point in a match + context using the function pcre2_set_callout(), and then passing that + context to pcre2_match() or pcre2_dfa_match(). If no match context is + passed, or if the callout entry point is set to NULL, callout points + will be passed over silently during matching. To disallow callouts in + the pattern syntax, you may use the PCRE2_EXTRA_NEVER_CALLOUT option. + + Within a regular expression, (?C) indicates a point at which the + external function is to be called. There are two kinds of callout: + those with a numerical argument and those with a string argument. (?C) + on its own with no argument is treated as (?C0). A numerical argument + allows the application to distinguish between different callouts. + String arguments were added for release 10.20 to make it possible for + script languages that use PCRE2 to embed short scripts within patterns + in a similar way to Perl. + + During matching, when PCRE2 reaches a callout point, the external func- + tion is called. It is provided with the number or string argument of + the callout, the position in the pattern, and one item of data that is + also set in the match block. The callout function may cause matching to + proceed, to backtrack, or to fail. + + By default, PCRE2 implements a number of optimizations at matching + time, and one side-effect is that sometimes callouts are skipped. If + you need all possible callouts to happen, you need to set options that + disable the relevant optimizations. More details, including a complete + description of the programming interface to the callout function, are + given in the pcre2callout documentation. + + Callouts with numerical arguments + + If you just want to have a means of identifying different callout + points, put a number less than 256 after the letter C. For example, + this pattern has two callout points: + + (?C1)abc(?C2)def + + If the PCRE2_AUTO_CALLOUT flag is passed to pcre2_compile(), numerical + callouts are automatically installed before each item in the pattern. + They are all numbered 255. If there is a conditional group in the pat- + tern whose condition is an assertion, an additional callout is inserted + just before the condition. An explicit callout may also be set at this + position, as in this example: + + (?(?C9)(?=a)abc|def) + + Note that this applies only to assertion conditions, not to other types + of condition. + + Callouts with string arguments + + A delimited string may be used instead of a number as a callout argu- + ment. The starting delimiter must be one of ` ' " ^ % # $ { and the + ending delimiter is the same as the start, except for {, where the end- + ing delimiter is }. If the ending delimiter is needed within the + string, it must be doubled. For example: + + (?C'ab ''c'' d')xyz(?C{any text})pqr + + The doubling is removed before the string is passed to the callout + function. + + +BACKTRACKING CONTROL + + There are a number of special "Backtracking Control Verbs" (to use + Perl's terminology) that modify the behaviour of backtracking during + matching. They are generally of the form (*VERB) or (*VERB:NAME). Some + verbs take either form, and may behave differently depending on whether + or not a name argument is present. The names are not required to be + unique within the pattern. + + By default, for compatibility with Perl, a name is any sequence of + characters that does not include a closing parenthesis. The name is not + processed in any way, and it is not possible to include a closing + parenthesis in the name. This can be changed by setting the + PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati- + ble. + + When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to + verb names and only an unescaped closing parenthesis terminates the + name. However, the only backslash items that are permitted are \Q, \E, + and sequences such as \x{100} that define character code points. Char- + acter type escapes such as \d are faulted. + + A closing parenthesis can be included in a name either as \) or between + \Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED + or PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb + names is skipped, and #-comments are recognized, exactly as in the rest + of the pattern. PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect + verb names unless PCRE2_ALT_VERBNAMES is also set. + + The maximum length of a name is 255 in the 8-bit library and 65535 in + the 16-bit and 32-bit libraries. If the name is empty, that is, if the + closing parenthesis immediately follows the colon, the effect is as if + the colon were not there. Any number of these verbs may occur in a pat- + tern. Except for (*ACCEPT), they may not be quantified. + + Since these verbs are specifically related to backtracking, most of + them can be used only when the pattern is to be matched using the tra- + ditional matching function or JIT, because they use backtracking algo- + rithms. With the exception of (*FAIL), which behaves like a failing + negative assertion, the backtracking control verbs cause an error if + encountered by the DFA matching function. + + The behaviour of these verbs in repeated groups, assertions, and in + capture groups called as subroutines (whether or not recursively) is + documented below. + + Optimizations that affect backtracking verbs + + PCRE2 contains some optimizations that are used to speed up matching by + running some checks at the start of each match attempt. For example, it + may know the minimum length of matching subject, or that a particular + character must be present. When one of these optimizations bypasses the + running of a match, any included backtracking verbs will not, of + course, be processed. You can suppress the start-of-match optimizations + by setting the PCRE2_NO_START_OPTIMIZE option when calling pcre2_com- + pile(), by calling pcre2_set_optimize() with a PCRE2_START_OPTIMIZE_OFF + directive, or by starting the pattern with (*NO_START_OPT). There is + more discussion of this option in the section entitled "Compiling a + pattern" in the pcre2api documentation. + + Experiments with Perl suggest that it too has similar optimizations, + and like PCRE2, turning them off can change the result of a match. + + Verbs that act immediately + + The following verbs act as soon as they are encountered. + + (*ACCEPT) or (*ACCEPT:NAME) + + This verb causes the match to end successfully, skipping the remainder + of the pattern. However, when it is inside a capture group that is + called as a subroutine, only that group is ended successfully. Matching + then continues at the outer level. If (*ACCEPT) in triggered in a posi- + tive assertion, the assertion succeeds; in a negative assertion, the + assertion fails. + + If (*ACCEPT) is inside capturing parentheses, the data so far is cap- + tured. For example: + + A((?:A|B(*ACCEPT)|C)D) + + This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap- + tured by the outer parentheses. + + (*ACCEPT) is the only backtracking verb that is allowed to be quanti- + fied because an ungreedy quantification with a minimum of zero acts + only when a backtrack happens. Consider, for example, + + (A(*ACCEPT)??B)C + + where A, B, and C may be complex expressions. After matching "A", the + matcher processes "BC"; if that fails, causing a backtrack, (*ACCEPT) + is triggered and the match succeeds. In both cases, all but C is cap- + tured. Whereas (*COMMIT) (see below) means "fail on backtrack", a re- + peated (*ACCEPT) of this type means "succeed on backtrack". + + Warning: (*ACCEPT) should not be used within a script run group, be- + cause it causes an immediate exit from the group, bypassing the script + run checking. + + (*FAIL) or (*FAIL:NAME) + + This verb causes a matching failure, forcing backtracking to occur. It + may be abbreviated to (*F). It is equivalent to (?!) but easier to + read. The Perl documentation notes that it is probably useful only when + combined with (?{}) or (??{}). Those are, of course, Perl features that + are not present in PCRE2. The nearest equivalent is the callout fea- + ture, as for example in this pattern: + + a+(?C)(*FAIL) + + A match with the string "aaaa" always fails, but the callout is taken + before each backtrack happens (in this example, 10 times). + + (*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*AC- + CEPT) and (*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is + recorded just before the verb acts. + + Recording which path was taken + + There is one verb whose main purpose is to track how a match was ar- + rived at, though it also has a secondary use in conjunction with ad- + vancing the match starting point (see (*SKIP) below). + + (*MARK:NAME) or (*:NAME) + + A name is always required with this verb. For all the other backtrack- + ing control verbs, a NAME argument is optional. + + When a match succeeds, the name of the last-encountered mark name on + the matching path is passed back to the caller as described in the sec- + tion entitled "Other information about the match" in the pcre2api docu- + mentation. This applies to all instances of (*MARK) and other verbs, + including those inside assertions and atomic groups. However, there are + differences in those cases when (*MARK) is used in conjunction with + (*SKIP) as described below. + + The mark name that was last encountered on the matching path is passed + back. A verb without a NAME argument is ignored for this purpose. Here + is an example of pcre2test output, where the "mark" modifier requests + the retrieval and outputting of (*MARK) data: + + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XY + 0: XY + MK: A + XZ + 0: XZ + MK: B + + The (*MARK) name is tagged with "MK:" in this output, and in this exam- + ple it indicates which of the two alternatives matched. This is a more + efficient way of obtaining this information than putting each alterna- + tive in its own capturing parentheses. + + If a verb with a name is encountered in a positive assertion that is + true, the name is recorded and passed back if it is the last-encoun- + tered. This does not happen for negative assertions or failing positive + assertions. + + After a partial match or a failed match, the last encountered name in + the entire match process is returned. For example: + + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XP + No match, mark = B + + Note that in this unanchored example the mark is retained from the + match attempt that started at the letter "X" in the subject. Subsequent + match attempts starting at "P" and then with an empty string do not get + as far as the (*MARK) item, but nevertheless do not reset it. + + If you are interested in (*MARK) values after failed matches, you + should probably either set the PCRE2_NO_START_OPTIMIZE option or call + pcre2_set_optimize() with a PCRE2_START_OPTIMIZE_OFF directive (see + above) to ensure that the match is always attempted. + + Verbs that act after backtracking + + The following verbs do nothing when they are encountered. Matching con- + tinues with what follows, but if there is a subsequent match failure, + causing a backtrack to the verb, a failure is forced. That is, back- + tracking cannot pass to the left of the verb. However, when one of + these verbs appears inside an atomic group or in an atomic lookaround + assertion that is true, its effect is confined to that group, because + once the group has been matched, there is never any backtracking into + it. Backtracking from beyond an atomic assertion or group ignores the + entire group, and seeks a preceding backtracking point. + + These verbs differ in exactly what kind of failure occurs when back- + tracking reaches them. The behaviour described below is what happens + when the verb is not in a subroutine or an assertion. Subsequent sec- + tions cover these special cases. + + (*COMMIT) or (*COMMIT:NAME) + + This verb causes the whole match to fail outright if there is a later + matching failure that causes backtracking to reach it. Even if the pat- + tern is unanchored, no further attempts to find a match by advancing + the starting point take place. If (*COMMIT) is the only backtracking + verb that is encountered, once it has been passed pcre2_match() is com- + mitted to finding a match at the current starting point, or not at all. + For example: + + a+(*COMMIT)b + + This matches "xxaab" but not "aacaab". It can be thought of as a kind + of dynamic anchor, or "I've started, so I must finish." + + The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COM- + MIT). It is like (*MARK:NAME) in that the name is remembered for pass- + ing back to the caller. However, (*SKIP:NAME) searches only for names + that are set with (*MARK), ignoring those set by any of the other back- + tracking verbs. + + If there is more than one backtracking verb in a pattern, a different + one that follows (*COMMIT) may be triggered first, so merely passing + (*COMMIT) during a match does not always guarantee that a match must be + at this starting point. + + Note that (*COMMIT) at the start of a pattern is not the same as an an- + chor, unless PCRE2's start-of-match optimizations are turned off, as + shown in this output from pcre2test: + + re> /(*COMMIT)abc/ + data> xyzabc + 0: abc + data> + re> /(*COMMIT)abc/no_start_optimize + data> xyzabc + No match + + For the first pattern, PCRE2 knows that any match must start with "a", + so the optimization skips along the subject to "a" before applying the + pattern to the first set of data. The match attempt then succeeds. The + second pattern disables the optimization that skips along to the first + character. The pattern is now applied starting at "x", and so the + (*COMMIT) causes the match to fail without trying any other starting + points. + + (*PRUNE) or (*PRUNE:NAME) + + This verb causes the match to fail at the current starting position in + the subject if there is a later matching failure that causes backtrack- + ing to reach it. If the pattern is unanchored, the normal "bumpalong" + advance to the next starting character then happens. Backtracking can + occur as usual to the left of (*PRUNE), before it is reached, or when + matching to the right of (*PRUNE), but if there is no match to the + right, backtracking cannot cross (*PRUNE). In simple cases, the use of + (*PRUNE) is just an alternative to an atomic group or possessive quan- + tifier, but there are some uses of (*PRUNE) that cannot be expressed in + any other way. In an anchored pattern (*PRUNE) has the same effect as + (*COMMIT). + + The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). + It is like (*MARK:NAME) in that the name is remembered for passing back + to the caller. However, (*SKIP:NAME) searches only for names set with + (*MARK), ignoring those set by other backtracking verbs. + + (*SKIP) + + This verb, when given without a name, is like (*PRUNE), except that if + the pattern is unanchored, the "bumpalong" advance is not to the next + character, but to the position in the subject where (*SKIP) was encoun- + tered. (*SKIP) signifies that whatever text was matched leading up to + it cannot be part of a successful match if there is a later mismatch. + Consider: + + a+(*SKIP)b + + If the subject is "aaaac...", after the first match attempt fails + (starting at the first character in the string), the starting point + skips on to start the next attempt at "c". Note that a possessive quan- + tifier does not have the same effect as this example; although it would + suppress backtracking during the first match attempt, the second at- + tempt would start at the second character instead of skipping on to + "c". + + If (*SKIP) is used to specify a new starting position that is the same + as the starting position of the current match, or (by being inside a + lookbehind) earlier, the position specified by (*SKIP) is ignored, and + instead the normal "bumpalong" occurs. + + (*SKIP:NAME) + + When (*SKIP) has an associated name, its behaviour is modified. When + such a (*SKIP) is triggered, the previous path through the pattern is + searched for the most recent (*MARK) that has the same name. If one is + found, the "bumpalong" advance is to the subject position that corre- + sponds to that (*MARK) instead of to where (*SKIP) was encountered. If + no (*MARK) with a matching name is found, the (*SKIP) is ignored. + + The search for a (*MARK) name uses the normal backtracking mechanism, + which means that it does not see (*MARK) settings that are inside + atomic groups or assertions, because they are never re-entered by back- + tracking. Compare the following pcre2test examples: + + re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: a + 1: a + data: + re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: b + 1: b + + In the first example, the (*MARK) setting is in an atomic group, so it + is not seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. + This allows the second branch of the pattern to be tried at the first + character position. In the second example, the (*MARK) setting is not + in an atomic group. This allows (*SKIP:X) to find the (*MARK) when it + backtracks, and this causes a new matching attempt to start at the sec- + ond character. This time, the (*MARK) is never seen because "a" does + not match "b", so the matcher immediately jumps to the second branch of + the pattern. + + Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It + ignores names that are set by other backtracking verbs. + + (*THEN) or (*THEN:NAME) + + This verb causes a skip to the next innermost alternative when back- + tracking reaches it. That is, it cancels any further backtracking + within the current alternative. Its name comes from the observation + that it can be used for a pattern-based if-then-else block: + + ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ... + + If the COND1 pattern matches, FOO is tried (and possibly further items + after the end of the group if FOO succeeds); on failure, the matcher + skips to the second alternative and tries COND2, without backtracking + into COND1. If that succeeds and BAR fails, COND3 is tried. If subse- + quently BAZ fails, there are no more alternatives, so there is a back- + track to whatever came before the entire group. If (*THEN) is not in- + side an alternation, it acts like (*PRUNE). + + The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). + It is like (*MARK:NAME) in that the name is remembered for passing back + to the caller. However, (*SKIP:NAME) searches only for names set with + (*MARK), ignoring those set by other backtracking verbs. + + A group that does not contain a | character is just a part of the en- + closing alternative; it is not a nested alternation with only one al- + ternative. The effect of (*THEN) extends beyond such a group to the en- + closing alternative. Consider this pattern, where A, B, etc. are com- + plex pattern fragments that do not contain any | characters at this + level: + + A (B(*THEN)C) | D + + If A and B are matched, but there is a failure in C, matching does not + backtrack into A; instead it moves to the next alternative, that is, D. + However, if the group containing (*THEN) is given an alternative, it + behaves differently: + + A (B(*THEN)C | (*FAIL)) | D + + The effect of (*THEN) is now confined to the inner group. After a fail- + ure in C, matching moves to (*FAIL), which causes the whole group to + fail because there are no more alternatives to try. In this case, + matching does backtrack into A. + + Note that a conditional group is not considered as having two alterna- + tives, because only one is ever used. In other words, the | character + in a conditional group has a different meaning. Ignoring white space, + consider: + + ^.*? (?(?=a) a | b(*THEN)c ) + + If the subject is "ba", this pattern does not match. Because .*? is un- + greedy, it initially matches zero characters. The condition (?=a) then + fails, the character "b" is matched, but "c" is not. At this point, + matching does not backtrack to .*? as might perhaps be expected from + the presence of the | character. The conditional group is part of the + single alternative that comprises the whole pattern, and so the match + fails. (If there was a backtrack into .*?, allowing it to match "b", + the match would succeed.) + + The verbs just described provide four different "strengths" of control + when subsequent matching fails. (*THEN) is the weakest, carrying on the + match at the next alternative. (*PRUNE) comes next, failing the match + at the current starting position, but allowing an advance to the next + character (for an unanchored pattern). (*SKIP) is similar, except that + the advance may be more than one character. (*COMMIT) is the strongest, + causing the entire match to fail. + + More than one backtracking verb + + If more than one backtracking verb is present in a pattern, the one + that is backtracked onto first acts. For example, consider this pat- + tern, where A, B, etc. are complex pattern fragments: + + (A(*COMMIT)B(*THEN)C|ABD) + + If A matches but B fails, the backtrack to (*COMMIT) causes the entire + match to fail. However, if A and B match, but C fails, the backtrack to + (*THEN) causes the next alternative (ABD) to be tried. This behaviour + is consistent, but is not always the same as Perl's. It means that if + two or more backtracking verbs appear in succession, all but the last + of them has no effect. Consider this example: + + ...(*COMMIT)(*PRUNE)... + + If there is a matching failure to the right, backtracking onto (*PRUNE) + causes it to be triggered, and its action is taken. There can never be + a backtrack onto (*COMMIT). + + Backtracking verbs in repeated groups + + PCRE2 sometimes differs from Perl in its handling of backtracking verbs + in repeated groups. For example, consider: + + /(a(*COMMIT)b)+ac/ + + If the subject is "abac", Perl matches unless its optimizations are + disabled, but PCRE2 always fails because the (*COMMIT) in the second + repeat of the group acts. + + Backtracking verbs in assertions + + (*FAIL) in any assertion has its normal effect: it forces an immediate + backtrack. The behaviour of the other backtracking verbs depends on + whether or not the assertion is standalone or acting as the condition + in a conditional group. + + (*ACCEPT) in a standalone positive assertion causes the assertion to + succeed without any further processing; captured strings and a mark + name (if set) are retained. In a standalone negative assertion, (*AC- + CEPT) causes the assertion to fail without any further processing; cap- + tured substrings and any mark name are discarded. + + If the assertion is a condition, (*ACCEPT) causes the condition to be + true for a positive assertion and false for a negative one; captured + substrings are retained in both cases. + + The remaining verbs act only when a later failure causes a backtrack to + reach them. This means that, for the Perl-compatible assertions, their + effect is confined to the assertion, because Perl lookaround assertions + are atomic. A backtrack that occurs after such an assertion is complete + does not jump back into the assertion. Note in particular that a + (*MARK) name that is set in an assertion is not "seen" by an instance + of (*SKIP:NAME) later in the pattern. + + PCRE2 now supports non-atomic positive assertions and also "scan sub- + string" assertions, as described in the sections entitled "Non-atomic + assertions" and "Scan substring assertions" above. These assertions + must be standalone (not used as conditions). They are not Perl-compati- + ble. For these assertions, a later backtrack does jump back into the + assertion, and therefore verbs such as (*COMMIT) can be triggered by + backtracks from later in the pattern. + + The effect of (*THEN) is not allowed to escape beyond an assertion. If + there are no more branches to try, (*THEN) causes a positive assertion + to be false, and a negative assertion to be true. This behaviour dif- + fers from Perl when the assertion has only one branch. + + The other backtracking verbs are not treated specially if they appear + in a standalone positive assertion. In a conditional positive asser- + tion, backtracking (from within the assertion) into (*COMMIT), (*SKIP), + or (*PRUNE) causes the condition to be false. However, for both stand- + alone and conditional negative assertions, backtracking into (*COMMIT), + (*SKIP), or (*PRUNE) causes the assertion to be true, without consider- + ing any further alternative branches. + + Backtracking verbs in subroutines + + These behaviours occur whether or not the group is called recursively. + + (*ACCEPT) in a group called as a subroutine causes the subroutine match + to succeed without any further processing. Matching then continues af- + ter the subroutine call. Perl documents this behaviour. Perl's treat- + ment of the other verbs in subroutines is different in some cases. + + (*FAIL) in a group called as a subroutine has its normal effect: it + forces an immediate backtrack. + + (*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail + when triggered by being backtracked to in a group called as a subrou- + tine. There is then a backtrack at the outer level. + + (*THEN), when triggered, skips to the next alternative in the innermost + enclosing group that has alternatives (its normal behaviour). However, + if there is no such group within the subroutine's group, the subroutine + match fails and there is a backtrack at the outer level. + + +EBCDIC ENVIRONMENTS + + Differences in the way PCRE behaves when it is running in an EBCDIC en- + vironment are covered in this section. + + Escape sequences + + When PCRE2 is compiled in EBCDIC mode, \N{U+hhh..} is not supported. + \a, \e, \f, \n, \r, and \t generate the appropriate EBCDIC code values. + The \c escape is processed as specified for Perl in the perlebcdic doc- + ument. The only characters that are allowed after \c are A-Z, a-z, or + one of @, [, \, ], ^, _, or ?. Any other character provokes a compile- + time error. The sequence \c@ encodes character code 0; after \c the + letters (in either case) encode characters 1-26 (hex 01 to hex 1A); [, + \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and \c? be- + comes either 255 (hex FF) or 95 (hex 5F). + + Thus, apart from \c?, these escapes generate the same character code + values as they do in an ASCII or Unicode environment, though the mean- + ings of the values mostly differ. For example, \cG always generates + code value 7, which is BEL in ASCII but DEL in EBCDIC. + + The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, + but because 127 is not a control character in EBCDIC, Perl makes it + generate the APC character. Unfortunately, there are several variants + of EBCDIC. In most of them the APC character has the value 255 (hex + FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If + certain other characters have POSIX-BC values, PCRE2 makes \c? generate + 95; otherwise it generates 255. + + Character classes + + In character classes there is a special case in EBCDIC environments for + ranges whose end points are both specified as literal letters in the + same case. For compatibility with Perl, EBCDIC code points within the + range that are not letters are omitted. For example, [h-k] matches only + four characters, even though the EBCDIC codes for h and k are 0x88 and + 0x92, a range of 11 code points. However, if the range is specified nu- + merically, for example, [\x88-\x92] or [h-\x92], all code points are + included. + + +SEE ALSO + + pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2syntax(3), + pcre2(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 November 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 27 November 2024 PCRE2PATTERN(3) +------------------------------------------------------------------------------ + + +PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 PERFORMANCE + + Two aspects of performance are discussed below: memory usage and pro- + cessing time. The way you express your pattern as a regular expression + can affect both of them. + + +COMPILED PATTERN MEMORY USAGE + + Patterns are compiled by PCRE2 into a reasonably efficient interpretive + code, so that most simple patterns do not use much memory for storing + the compiled version. However, there is one case where the memory usage + of a compiled pattern can be unexpectedly large. If a parenthesized + group has a quantifier with a minimum greater than 1 and/or a limited + maximum, the whole group is repeated in the compiled code. For example, + the pattern + + (abc|def){2,4} + + is compiled as if it were + + (abc|def)(abc|def)((abc|def)(abc|def)?)? + + (Technical aside: It is done this way so that backtrack points within + each of the repetitions can be independently maintained.) + + For regular expressions whose quantifiers use only small numbers, this + is not usually a problem. However, if the numbers are large, and par- + ticularly if such repetitions are nested, the memory usage can become + an embarrassment. For example, the very simple pattern + + ((ab){1,1000}c){1,3} + + uses over 50KiB when compiled using the 8-bit library. When PCRE2 is + compiled with its default internal pointer size of two bytes, the size + limit on a compiled pattern is 65535 code units in the 8-bit and 16-bit + libraries, and this is reached with the above pattern if the outer rep- + etition is increased from 3 to 4. PCRE2 can be compiled to use larger + internal pointers and thus handle larger compiled patterns, but it is + better to try to rewrite your pattern to use less memory if you can. + + One way of reducing the memory usage for such patterns is to make use + of PCRE2's "subroutine" facility. Re-writing the above pattern as + + ((ab)(?2){0,999}c)(?1){0,2} + + reduces the memory requirements to around 16KiB, and indeed it remains + under 20KiB even with the outer repetition increased to 100. However, + this kind of pattern is not always exactly equivalent, because any cap- + tures within subroutine calls are lost when the subroutine completes. + If this is not a problem, this kind of rewriting will allow you to + process patterns that PCRE2 cannot otherwise handle. The matching per- + formance of the two different versions of the pattern are roughly the + same. (This applies from release 10.30 - things were different in ear- + lier releases.) + + +STACK AND HEAP USAGE AT RUN TIME + + From release 10.30, the interpretive (non-JIT) version of pcre2_match() + uses very little system stack at run time. In earlier releases recur- + sive function calls could use a great deal of stack, and this could + cause problems, but this usage has been eliminated. Backtracking posi- + tions are now explicitly remembered in memory frames controlled by the + code. + + The size of each frame depends on the size of pointer variables and the + number of capturing parenthesized groups in the pattern being matched. + On a 64-bit system the frame size for a pattern with no captures is 128 + bytes. For each capturing group the size increases by 16 bytes. + + Until release 10.41, an initial 20KiB frames vector was allocated on + the system stack, but this still caused some issues for multi-thread + applications where each thread has a very small stack. From release + 10.41 backtracking memory frames are always held in heap memory. An + initial heap allocation is obtained the first time any match data block + is passed to pcre2_match(). This is remembered with the match data + block and re-used if that block is used for another match. It is freed + when the match data block itself is freed. + + The size of the initial block is the larger of 20KiB or ten times the + pattern's frame size, unless the heap limit is less than this, in which + case the heap limit is used. If the initial block proves to be too + small during matching, it is replaced by a larger block, subject to the + heap limit. The heap limit is checked only when a new block is to be + allocated. Reducing the heap limit between calls to pcre2_match() with + the same match data block does not affect the saved block. + + In contrast to pcre2_match(), pcre2_dfa_match() does use recursive + function calls, but only for processing atomic groups, lookaround as- + sertions, and recursion within the pattern. The original version of the + code used to allocate quite large internal workspace vectors on the + stack, which caused some problems for some patterns in environments + with small stacks. From release 10.32 the code for pcre2_dfa_match() + has been re-factored to use heap memory when necessary for internal + workspace when recursing, though recursive function calls are still + used. + + The "match depth" parameter can be used to limit the depth of function + recursion, and the "match heap" parameter to limit heap memory in + pcre2_dfa_match(). + + +PROCESSING TIME + + Certain items in regular expression patterns are processed more effi- + ciently than others. It is more efficient to use a character class like + [aeiou] than a set of single-character alternatives such as + (a|e|i|o|u). In general, the simplest construction that provides the + required behaviour is usually the most efficient. Jeffrey Friedl's book + contains a lot of useful general discussion about optimizing regular + expressions for efficient performance. This document contains a few ob- + servations about PCRE2. + + Using Unicode character properties (the \p, \P, and \X escapes) is + slow, because PCRE2 has to use a multi-stage table lookup whenever it + needs a character's property. If you can find an alternative pattern + that does not use character properties, it will probably be faster. + + By default, the escape sequences \b, \d, \s, and \w, and the POSIX + character classes such as [:alpha:] do not use Unicode properties, + partly for backwards compatibility, and partly for performance reasons. + However, you can set the PCRE2_UCP option or start the pattern with + (*UCP) if you want Unicode character properties to be used. This can + double the matching time for items such as \d, when matched with + pcre2_match(); the performance loss is less with a DFA matching func- + tion, and in both cases there is not much difference for \b. + + When a pattern begins with .* not in atomic parentheses, nor in paren- + theses that are the subject of a backreference, and the PCRE2_DOTALL + option is set, the pattern is implicitly anchored by PCRE2, since it + can match only at the start of a subject string. If the pattern has + multiple top-level branches, they must all be anchorable. The optimiza- + tion can be disabled by the PCRE2_NO_DOTSTAR_ANCHOR option, and is au- + tomatically disabled if the pattern contains (*PRUNE) or (*SKIP). + + If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, be- + cause the dot metacharacter does not then match a newline, and if the + subject string contains newlines, the pattern may match from the char- + acter immediately following one of them instead of from the very start. + For example, the pattern + + .*second + + matches the subject "first\nand second" (where \n stands for a newline + character), with the match starting at the seventh character. In order + to do this, PCRE2 has to retry the match starting after every newline + in the subject. + + If you are using such a pattern with subject strings that do not con- + tain newlines, the best performance is obtained by setting + PCRE2_DOTALL, or starting the pattern with ^.* or ^.*? to indicate ex- + plicit anchoring. That saves PCRE2 from having to scan along the sub- + ject looking for a newline to restart at. + + Beware of patterns that contain nested indefinite repeats. These can + take a long time to run when applied to a string that does not match. + Consider the pattern fragment + + ^(a+)* + + This can match "aaaa" in 16 different ways, and this number increases + very rapidly as the string gets longer. (The * repeat can match 0, 1, + 2, 3, or 4 times, and for each of those cases other than 0 or 4, the + + repeats can match different numbers of times.) When the remainder of + the pattern is such that the entire match is going to fail, PCRE2 has + in principle to try every possible variation, and this can take an ex- + tremely long time, even for relatively short strings. + + An optimization catches some of the more simple cases such as + + (a+)*b + + where a literal character follows. Before embarking on the standard + matching procedure, PCRE2 checks that there is a "b" later in the sub- + ject string, and if there is not, it fails the match immediately. How- + ever, when there is no following literal this optimization cannot be + used. You can see the difference by comparing the behaviour of + + (a+)*\d + + with the pattern above. The former gives a failure almost instantly + when applied to a whole line of "a" characters, whereas the latter + takes an appreciable time with strings longer than about 20 characters. + + In many cases, the solution to this kind of performance issue is to use + an atomic group or a possessive quantifier. This can often reduce mem- + ory requirements as well. As another example, consider this pattern: + + ([^<]|<(?!inet))+ + + It matches from wherever it starts until it encounters " + + int pcre2_regcomp(regex_t *preg, const char *pattern, + int cflags); + + int pcre2_regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + + size_t pcre2_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); + + void pcre2_regfree(regex_t *preg); + + +DESCRIPTION + + This set of functions provides a POSIX-style API for the PCRE2 regular + expression 8-bit library. There are no POSIX-style wrappers for PCRE2's + 16-bit and 32-bit libraries. See the pcre2api documentation for a de- + scription of PCRE2's native API, which contains much additional func- + tionality. + + IMPORTANT NOTE: The functions described here are NOT thread-safe, and + should not be used in multi-threaded applications. They are also lim- + ited to processing subjects that are not bigger than 2GB. Use the na- + tive API instead. + + These functions are wrapper functions that ultimately call the PCRE2 + native API. Their prototypes are defined in the pcre2posix.h header + file, and they all have unique names starting with pcre2_. However, the + pcre2posix.h header also contains macro definitions that convert the + standard POSIX names such regcomp() into pcre2_regcomp() etc. This + means that a program can use the usual POSIX names without running the + risk of accidentally linking with POSIX functions from a different li- + brary. + + On Unix-like systems the PCRE2 POSIX library is called libpcre2-posix, + so can be accessed by adding -lpcre2-posix to the command for linking + an application. Because the POSIX functions call the native ones, it is + also necessary to add -lpcre2-8. + + On Windows systems, if you are linking to a DLL version of the library, + it is recommended that PCRE2POSIX_SHARED is defined before including + the pcre2posix.h header, as it will allow for a more efficient way to + invoke the functions by adding the __declspec(dllimport) decorator. + + Although they were not defined as prototypes in pcre2posix.h, releases + 10.33 to 10.36 of the library contained functions with the POSIX names + regcomp() etc. These simply passed their arguments to the PCRE2 func- + tions. These functions were provided for backwards compatibility with + earlier versions of PCRE2, which had only POSIX names. However, this + has proved troublesome in situations where a program links with several + libraries, some of which use PCRE2's POSIX interface while others use + the real POSIX functions. For this reason, the POSIX names have been + removed since release 10.37. + + Calling the header file pcre2posix.h avoids any conflict with other + POSIX libraries. It can, of course, be renamed or aliased as regex.h, + which is the "correct" name, if there is no clash. It provides two + structure types, regex_t for compiled internal forms, and regmatch_t + for returning captured substrings. It also defines some constants whose + names start with "REG_"; these are used for setting options and identi- + fying error codes. + + +USING THE POSIX FUNCTIONS + + Note that these functions are just POSIX-style wrappers for PCRE2's na- + tive API. They do not give POSIX regular expression behaviour, and + they are not thread-safe or even POSIX compatible. + + Those POSIX option bits that can reasonably be mapped to PCRE2 native + options have been implemented. In addition, the option REG_EXTENDED is + defined with the value zero. This has no effect, but since programs + that are written to the POSIX interface often use it, this makes it + easier to slot in PCRE2 as a replacement library. Other POSIX options + are not even defined. + + There are also some options that are not defined by POSIX. These have + been added at the request of users who want to make use of certain + PCRE2-specific features via the POSIX calling interface or to add BSD + or GNU functionality. + + When PCRE2 is called via these functions, it is only the API that is + POSIX-like in style. The syntax and semantics of the regular expres- + sions themselves are still those of Perl, subject to the setting of + various PCRE2 options, as described below. "POSIX-like in style" means + that the API approximates to the POSIX definition; it is not fully + POSIX-compatible, and in multi-unit encoding domains it is probably + even less compatible. + + The descriptions below use the actual names of the functions, but, as + described above, the standard POSIX names (without the pcre2_ prefix) + may also be used. + + +COMPILING A PATTERN + + The function pcre2_regcomp() is called to compile a pattern into an in- + ternal form. By default, the pattern is a C string terminated by a bi- + nary zero (but see REG_PEND below). The preg argument is a pointer to a + regex_t structure that is used as a base for storing information about + the compiled regular expression. It is also used for input when + REG_PEND is set. The regex_t structure used by pcre2_regcomp() is de- + fined in pcre2posix.h and is not the same as the structure used by + other libraries that provide POSIX-style matching. + + The argument cflags is either zero, or contains one or more of the bits + defined by the following macros: + + REG_DOTALL + + The PCRE2_DOTALL option is set when the regular expression is passed + for compilation to the native function. Note that REG_DOTALL is not + part of the POSIX standard. + + REG_ICASE + + The PCRE2_CASELESS option is set when the regular expression is passed + for compilation to the native function. + + REG_NEWLINE + + The PCRE2_MULTILINE option is set when the regular expression is passed + for compilation to the native function. Note that this does not mimic + the defined POSIX behaviour for REG_NEWLINE (see the following sec- + tion). + + REG_NOSPEC + + The PCRE2_LITERAL option is set when the regular expression is passed + for compilation to the native function. This disables all meta charac- + ters in the pattern, causing it to be treated as a literal string. The + only other options that are allowed with REG_NOSPEC are REG_ICASE, + REG_NOSUB, REG_PEND, and REG_UTF. Note that REG_NOSPEC is not part of + the POSIX standard. + + REG_NOSUB + + When a pattern that is compiled with this flag is passed to + pcre2_regexec() for matching, the nmatch and pmatch arguments are ig- + nored, and no captured strings are returned. Versions of the PCRE2 li- + brary prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile op- + tion, but this no longer happens because it disables the use of back- + references. + + REG_PEND + + If this option is set, the reg_endp field in the preg structure (which + has the type const char *) must be set to point to the character beyond + the end of the pattern before calling pcre2_regcomp(). The pattern it- + self may now contain binary zeros, which are treated as data charac- + ters. Without REG_PEND, a binary zero terminates the pattern and the + re_endp field is ignored. This is a GNU extension to the POSIX standard + and should be used with caution in software intended to be portable to + other systems. + + REG_UCP + + The PCRE2_UCP option is set when the regular expression is passed for + compilation to the native function. This causes PCRE2 to use Unicode + properties when matching \d, \w, etc., instead of just recognizing + ASCII values. Note that REG_UCP is not part of the POSIX standard. + + REG_UNGREEDY + + The PCRE2_UNGREEDY option is set when the regular expression is passed + for compilation to the native function. Note that REG_UNGREEDY is not + part of the POSIX standard. + + REG_UTF + + The PCRE2_UTF option is set when the regular expression is passed for + compilation to the native function. This causes the pattern itself and + all data strings used for matching it to be treated as UTF-8 strings. + Note that REG_UTF is not part of the POSIX standard. + + In the absence of these flags, no options are passed to the native + function. This means that the regex is compiled with PCRE2 default se- + mantics. In particular, the way it handles newline characters in the + subject string is the Perl way, not the POSIX way. Note that setting + PCRE2_MULTILINE has only some of the effects specified for REG_NEWLINE. + It does not affect the way newlines are matched by the dot metacharac- + ter (they are not) or by a negative class such as [^a] (they are). + + The yield of pcre2_regcomp() is zero on success, and non-zero other- + wise. The preg structure is filled in on success, and one other member + of the structure (as well as re_endp) is public: re_nsub contains the + number of capturing subpatterns in the regular expression. Various er- + ror codes are defined in the header file. + + NOTE: If the yield of pcre2_regcomp() is non-zero, you must not attempt + to use the contents of the preg structure. If, for example, you pass it + to pcre2_regexec(), the result is undefined and your program is likely + to crash. + + +MATCHING NEWLINE CHARACTERS + + This area is not simple, because POSIX and Perl take different views of + things. It is not possible to get PCRE2 to obey POSIX semantics, but + then PCRE2 was never intended to be a POSIX engine. The following table + lists the different possibilities for matching newline characters in + Perl and PCRE2: + + Default Change with + + . matches newline no PCRE2_DOTALL + newline matches [^a] yes not changeable + $ matches \n at end yes PCRE2_DOLLAR_ENDONLY + $ matches \n in middle no PCRE2_MULTILINE + ^ matches \n in middle no PCRE2_MULTILINE + + This is the equivalent table for a POSIX-compatible pattern matcher: + + Default Change with + + . matches newline yes REG_NEWLINE + newline matches [^a] yes REG_NEWLINE + $ matches \n at end no REG_NEWLINE + $ matches \n in middle no REG_NEWLINE + ^ matches \n in middle no REG_NEWLINE + + This behaviour is not what happens when PCRE2 is called via its POSIX + API. By default, PCRE2's behaviour is the same as Perl's, except that + there is no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 + and Perl, there is no way to stop newline from matching [^a]. + + Default POSIX newline handling can be obtained by setting PCRE2_DOTALL + and PCRE2_DOLLAR_ENDONLY when calling pcre2_compile() directly, but + there is no way to make PCRE2 behave exactly as for the REG_NEWLINE ac- + tion. When using the POSIX API, passing REG_NEWLINE to PCRE2's + pcre2_regcomp() function causes PCRE2_MULTILINE to be passed to + pcre2_compile(), and REG_DOTALL passes PCRE2_DOTALL. There is no way to + pass PCRE2_DOLLAR_ENDONLY. + + +MATCHING A PATTERN + + The function pcre2_regexec() is called to match a compiled pattern preg + against a given string, which is by default terminated by a zero byte + (but see REG_STARTEND below), subject to the options in eflags. These + can be: + + REG_NOTBOL + + The PCRE2_NOTBOL option is set when calling the underlying PCRE2 match- + ing function. + + REG_NOTEMPTY + + The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 + matching function. Note that REG_NOTEMPTY is not part of the POSIX + standard. However, setting this option can give more POSIX-like behav- + iour in some situations. + + REG_NOTEOL + + The PCRE2_NOTEOL option is set when calling the underlying PCRE2 match- + ing function. + + REG_STARTEND + + When this option is set, the subject string starts at string + + pmatch[0].rm_so and ends at string + pmatch[0].rm_eo, which should + point to the first character beyond the string. There may be binary ze- + ros within the subject string, and indeed, using REG_STARTEND is the + only way to pass a subject string that contains a binary zero. + + Whatever the value of pmatch[0].rm_so, the offsets of the matched + string and any captured substrings are still given relative to the + start of string itself. (Before PCRE2 release 10.30 these were given + relative to string + pmatch[0].rm_so, but this differs from other im- + plementations.) + + This is a BSD extension, compatible with but not specified by IEEE + Standard 1003.2 (POSIX.2), and should be used with caution in software + intended to be portable to other systems. Note that a non-zero rm_so + does not imply REG_NOTBOL; REG_STARTEND affects only the location and + length of the string, not how it is matched. Setting REG_STARTEND and + passing pmatch as NULL are mutually exclusive; the error REG_INVARG is + returned. + + If the pattern was compiled with the REG_NOSUB flag, no data about any + matched strings is returned. The nmatch and pmatch arguments of + pcre2_regexec() are ignored (except possibly as input for REG_STAR- + TEND). + + The value of nmatch may be zero, and the value pmatch may be NULL (un- + less REG_STARTEND is set); in both these cases no data about any + matched strings is returned. + + Otherwise, the portion of the string that was matched, and also any + captured substrings, are returned via the pmatch argument, which points + to an array of nmatch structures of type regmatch_t, containing the + members rm_so and rm_eo. These contain the byte offset to the first + character of each substring and the offset to the first character after + the end of each substring, respectively. The 0th element of the vector + relates to the entire portion of string that was matched; subsequent + elements relate to the capturing subpatterns of the regular expression. + Unused entries in the array have both structure members set to -1. + + regmatch_t as well as the regoff_t typedef it uses are defined in + pcre2posix.h and are not warranted to have the same size or layout as + other similarly named types from other libraries that provide POSIX- + style matching. + + A successful match yields a zero return; various error codes are de- + fined in the header file, of which REG_NOMATCH is the "expected" fail- + ure code. + + +ERROR MESSAGES + + The pcre2_regerror() function maps a non-zero errorcode from either + pcre2_regcomp() or pcre2_regexec() to a printable message. If preg is + not NULL, the error should have arisen from the use of that structure. + A message terminated by a binary zero is placed in errbuf. If the + buffer is too short, only the first errbuf_size - 1 characters of the + error message are used. The yield of the function is the size of buffer + needed to hold the whole message, including the terminating zero. This + value is greater than errbuf_size if the message was truncated. + + +MEMORY USAGE + + Compiling a regular expression causes memory to be allocated and asso- + ciated with the preg structure. The function pcre2_regfree() frees all + such memory, after which preg may no longer be used as a compiled ex- + pression. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 November 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 27 November 2024 PCRE2POSIX(3) +------------------------------------------------------------------------------ + + +PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 SAMPLE PROGRAM + + A simple, complete demonstration program to get you started with using + PCRE2 is supplied in the file pcre2demo.c in the src directory in the + PCRE2 distribution. A listing of this program is given in the pcre2demo + documentation. If you do not have a copy of the PCRE2 distribution, you + can save this listing to re-create the contents of pcre2demo.c. + + The demonstration program compiles the regular expression that is its + first argument, and matches it against the subject string in its second + argument. No PCRE2 options are set, and default character tables are + used. If matching succeeds, the program outputs the portion of the sub- + ject that matched, together with the contents of any captured sub- + strings. + + If the -g option is given on the command line, the program then goes on + to check for further matches of the same regular expression in the same + subject string. The logic is a little bit tricky because of the possi- + bility of matching an empty string. Comments in the code explain what + is going on. + + The code in pcre2demo.c is an 8-bit program that uses the PCRE2 8-bit + library. It handles strings and characters that are stored in 8-bit + code units. By default, one character corresponds to one code unit, + but if the pattern starts with "(*UTF)", both it and the subject are + treated as UTF-8 strings, where characters may occupy multiple code + units. + + If PCRE2 is installed in the standard include and library directories + for your operating system, you should be able to compile the demonstra- + tion program using a command like this: + + cc -o pcre2demo pcre2demo.c -lpcre2-8 + + If PCRE2 is installed elsewhere, you may need to add additional options + to the command line. For example, on a Unix-like system that has PCRE2 + installed in /usr/local, you can compile the demonstration program us- + ing a command like this: + + cc -o pcre2demo -I/usr/local/include pcre2demo.c \ + -L/usr/local/lib -lpcre2-8 + + Once you have built the demonstration program, you can run simple tests + like this: + + ./pcre2demo 'cat|dog' 'the cat sat on the mat' + ./pcre2demo -g 'cat|dog' 'the dog sat on the cat' + + Note that there is a much more comprehensive test program, called + pcre2test, which supports many more facilities for testing regular ex- + pressions using all three PCRE2 libraries (8-bit, 16-bit, and 32-bit, + though not all three need be installed). The pcre2demo program is pro- + vided as a relatively simple coding example. + + If you try to run pcre2demo when PCRE2 is not installed in the standard + library directory, you may get an error like this on some operating + systems (e.g. Solaris): + + ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file + or directory + + This is caused by the way shared library support works on those sys- + tems. You need to add + + -R/usr/local/lib + + (for example) to the compile command to get round this problem. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 14 November 2023 + Copyright (c) 1997-2016 University of Cambridge. + + +PCRE2 10.45 14 November 2023 PCRE2SAMPLE(3) +------------------------------------------------------------------------------ +PCRE2SERIALIZE(3) Library Functions Manual PCRE2SERIALIZE(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS + + int32_t pcre2_serialize_decode(pcre2_code **codes, + int32_t number_of_codes, const uint8_t *bytes, + pcre2_general_context *gcontext); + + int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, + PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); + + void pcre2_serialize_free(uint8_t *bytes); + + int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes); + + If you are running an application that uses a large number of regular + expression patterns, it may be useful to store them in a precompiled + form instead of having to compile them every time the application is + run. However, if you are using the just-in-time optimization feature, + it is not possible to save and reload the JIT data, because it is posi- + tion-dependent. The host on which the patterns are reloaded must be + running the same version of PCRE2, with the same code unit width, and + must also have the same endianness, pointer width and PCRE2_SIZE type. + For example, patterns compiled on a 32-bit system using PCRE2's 16-bit + library cannot be reloaded on a 64-bit system, nor can they be reloaded + using the 8-bit library. + + Note that "serialization" in PCRE2 does not convert compiled patterns + to an abstract format like Java or .NET serialization. The serialized + output is really just a bytecode dump, which is why it can only be re- + loaded in the same environment as the one that created it. Hence the + restrictions mentioned above. Applications that are not statically + linked with a fixed version of PCRE2 must be prepared to recompile pat- + terns from their sources, in order to be immune to PCRE2 upgrades. + + +SECURITY CONCERNS + + The facility for saving and restoring compiled patterns is intended for + use within individual applications. As such, the data supplied to + pcre2_serialize_decode() is expected to be trusted data, not data from + arbitrary external sources. There is only some simple consistency + checking, not complete validation of what is being re-loaded. Corrupted + data may cause undefined results. For example, if the length field of a + pattern in the serialized data is corrupted, the deserializing code may + read beyond the end of the byte stream that is passed to it. + + +SAVING COMPILED PATTERNS + + Before compiled patterns can be saved they must be serialized, which in + PCRE2 means converting the pattern to a stream of bytes. A single byte + stream may contain any number of compiled patterns, but they must all + use the same character tables. A single copy of the tables is included + in the byte stream (its size is 1088 bytes). For more details of char- + acter tables, see the section on locale support in the pcre2api docu- + mentation. + + The function pcre2_serialize_encode() creates a serialized byte stream + from a list of compiled patterns. Its first two arguments specify the + list, being a pointer to a vector of pointers to compiled patterns, and + the length of the vector. The third and fourth arguments point to vari- + ables which are set to point to the created byte stream and its length, + respectively. The final argument is a pointer to a general context, + which can be used to specify custom memory management functions. If + this argument is NULL, malloc() is used to obtain memory for the byte + stream. The yield of the function is the number of serialized patterns, + or one of the following negative error codes: + + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL + + PCRE2_ERROR_BADMAGIC means either that a pattern's code has been cor- + rupted, or that a slot in the vector does not point to a compiled pat- + tern. + + Once a set of patterns has been serialized you can save the data in any + appropriate manner. Here is sample code that compiles two patterns and + writes them to a file. It assumes that the variable fd refers to a file + that is open for output. The error checking that should be present in a + real application has been omitted for simplicity. + + int errorcode; + uint8_t *bytes; + PCRE2_SIZE erroroffset; + PCRE2_SIZE bytescount; + pcre2_code *list_of_codes[2]; + list_of_codes[0] = pcre2_compile("first pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + list_of_codes[1] = pcre2_compile("second pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes, + &bytescount, NULL); + errorcode = fwrite(bytes, 1, bytescount, fd); + + Note that the serialized data is binary data that may contain any of + the 256 possible byte values. On systems that make a distinction be- + tween binary and non-binary data, be sure that the file is opened for + binary output. + + Serializing a set of patterns leaves the original data untouched, so + they can still be used for matching. Their memory must eventually be + freed in the usual way by calling pcre2_code_free(). When you have fin- + ished with the byte stream, it too must be freed by calling pcre2_seri- + alize_free(). If this function is called with a NULL argument, it re- + turns immediately without doing anything. + + +RE-USING PRECOMPILED PATTERNS + + In order to re-use a set of saved patterns you must first make the se- + rialized byte stream available in main memory (for example, by reading + from a file). The management of this memory block is up to the applica- + tion. You can use the pcre2_serialize_get_number_of_codes() function to + find out how many compiled patterns are in the serialized data without + actually decoding the patterns: + + uint8_t *bytes = ; + int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes); + + The pcre2_serialize_decode() function reads a byte stream and recreates + the compiled patterns in new memory blocks, setting pointers to them in + a vector. The first two arguments are a pointer to a suitable vector + and its length, and the third argument points to a byte stream. The fi- + nal argument is a pointer to a general context, which can be used to + specify custom memory management functions for the decoded patterns. If + this argument is NULL, malloc() and free() are used. After deserializa- + tion, the byte stream is no longer needed and can be discarded. + + pcre2_code *list_of_codes[2]; + uint8_t *bytes = ; + int32_t number_of_codes = + pcre2_serialize_decode(list_of_codes, 2, bytes, NULL); + + If the vector is not large enough for all the patterns in the byte + stream, it is filled with those that fit, and the remainder are ig- + nored. The yield of the function is the number of decoded patterns, or + one of the following negative error codes: + + PCRE2_ERROR_BADDATA second argument is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data + PCRE2_ERROR_BADMODE mismatch of code unit size or PCRE2 version + PCRE2_ERROR_BADSERIALIZEDDATA other sanity check failure + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_NULL first or third argument is NULL + + PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was + compiled on a system with different endianness. + + Decoded patterns can be used for matching in the usual way, and must be + freed by calling pcre2_code_free(). However, be aware that there is a + potential race issue if you are using multiple patterns that were de- + coded from a single byte stream in a multithreaded application. A sin- + gle copy of the character tables is used by all the decoded patterns + and a reference count is used to arrange for its memory to be automati- + cally freed when the last pattern is freed, but there is no locking on + this reference count. Therefore, if you want to call pcre2_code_free() + for these patterns in different threads, you must arrange your own + locking, and ensure that pcre2_code_free() cannot be called by two + threads at the same time. + + If a pattern was processed by pcre2_jit_compile() before being serial- + ized, the JIT data is discarded and so is no longer available after a + save/restore cycle. You can, however, process a restored pattern with + pcre2_jit_compile() if you wish. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 19 January 2024 + Copyright (c) 1997-2018 University of Cambridge. + + +PCRE2 10.45 19 January 2024 PCRE2SERIALIZE(3) +------------------------------------------------------------------------------ + + +PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY + + The full syntax and semantics of the regular expression patterns that + are supported by PCRE2 are described in the pcre2pattern documentation. + This document contains a quick-reference summary of the pattern syntax + followed by the syntax of replacement strings in substitution function. + The full description of the latter is in the pcre2api documentation. + + +QUOTING + + \x where x is non-alphanumeric is a literal x + \Q...\E treat enclosed characters as literal + + Note that white space inside \Q...\E is always treated as literal, even + if PCRE2_EXTENDED is set, causing most other white space to be ignored. + Note also that PCRE2's handling of \Q...\E has some differences from + Perl's. See the pcre2pattern documentation for details. + + +BRACED ITEMS + + With one exception, wherever brace characters { and } are required to + enclose data for constructions such as \g{2} or \k{name}, space and/or + horizontal tab characters that follow { or precede } are allowed and + are ignored. In the case of quantifiers, they may also appear before or + after the comma. The exception is \u{...} which is not Perl-compatible + and is recognized only when PCRE2_EXTRA_ALT_BSUX is set. This is an EC- + MAScript compatibility feature, and follows ECMAScript's behaviour. + + +ESCAPED CHARACTERS + + This table applies to ASCII and Unicode environments. An unrecognized + escape sequence causes an error. + + \a alarm, that is, the BEL character (hex 07) + \cx "control-x", where x is a non-control ASCII character + \e escape (hex 1B) + \f form feed (hex 0C) + \n newline (hex 0A) + \r carriage return (hex 0D) + \t tab (hex 09) + \0dd character with octal code 0dd + \ddd character with octal code ddd, or backreference + \o{ddd..} character with octal code ddd.. + \N{U+hh..} character with Unicode code point hh.. (Unicode mode only) + \xhh character with hex code hh + \x{hh..} character with hex code hh.. + + \N{U+hh..} is synonymous with \x{hh..} but is not supported in environ- + ments that use EBCDIC code (mainly IBM mainframes). Note that \N not + followed by an opening curly bracket has a different meaning (see be- + low). + + If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the + following are also recognized: + + \U the character "U" + \uhhhh character with hex code hhhh + \u{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX + + When \x is not followed by {, one or two hexadecimal digits are read, + but in ALT_BSUX mode \x must be followed by two hexadecimal digits to + be recognized as a hexadecimal escape; otherwise it matches a literal + "x". Likewise, if \u (in ALT_BSUX mode) is not followed by four hexa- + decimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in + curly brackets, it matches a literal "u". + + Note that \0dd is always an octal code. The treatment of backslash fol- + lowed by a non-zero digit is complicated; for details see the section + "Non-printing characters" in the pcre2pattern documentation, where de- + tails of escape processing in EBCDIC environments are also given. + + +CHARACTER TYPES + + . any character except newline; + in dotall mode, any character whatsoever + \C one code unit, even in UTF mode (best avoided) + \d a decimal digit + \D a character that is not a decimal digit + \h a horizontal white space character + \H a character that is not a horizontal white space character + \N a character that is not a newline + \p{xx} a character with the xx property + \P{xx} a character without the xx property + \R a newline sequence + \s a white space character + \S a character that is not a white space character + \v a vertical white space character + \V a character that is not a vertical white space character + \w a "word" character + \W a "non-word" character + \X a Unicode extended grapheme cluster + + \C is dangerous because it may leave the current matching point in the + middle of a UTF-8 or UTF-16 character. The application can lock out the + use of \C by setting the PCRE2_NEVER_BACKSLASH_C option. It is also + possible to build PCRE2 with the use of \C permanently disabled. + + By default, \d, \s, and \w match only ASCII characters, even in UTF-8 + mode or in the 16-bit and 32-bit libraries. However, if locale-specific + matching is happening, \s and \w may also match characters with code + points in the range 128-255. If the PCRE2_UCP option is set, the behav- + iour of these escape sequences is changed to use Unicode properties and + they match many more characters, but there are some option settings + that can restrict individual sequences to matching only ASCII charac- + ters. + + Property descriptions in \p and \P are matched caselessly; hyphens, un- + derscores, and ASCII white space characters are ignored, in accordance + with Unicode's "loose matching" rules. For example, \p{Bidi_Class=al} + is the same as \p{ bidi class = AL }. + + +GENERAL CATEGORY PROPERTIES FOR \p and \P + + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate + + L Letter + Lc Cased letter, the union of Ll, Lu, and Lt + L& Synonym of Lc + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter + + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark + + N Number + Nd Decimal number + Nl Letter number + No Other number + + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation + + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol + + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator + + From release 10.45, when caseless matching is set, Ll, Lu, and Lt are + all equivalent to Lc. + + +PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P + + Xan Alphanumeric: union of properties L and N + Xps POSIX space: property Z or tab, NL, VT, FF, CR + Xsp Perl space: property Z or tab, NL, VT, FF, CR + Xuc Universally-named character: one that can be + represented by a Universal Character Name + Xwd Perl word: property Xan or underscore + + Perl and POSIX space are now the same. Perl added VT to its space char- + acter set at release 5.18. + + +BINARY PROPERTIES FOR \p AND \P + + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by + running this command: + + pcre2test -LP + + +SCRIPT MATCHING WITH \p AND \P + + Many script names and their 4-letter abbreviations are recognized in + \p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P + of course). You can obtain a list of these scripts by running this com- + mand: + + pcre2test -LS + + +THE BIDI_CLASS PROPERTY FOR \p AND \P + + \p{Bidi_Class:} matches a character with the given class + \p{BC:} matches a character with the given class + + The recognized classes are: + + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS white space + + +CHARACTER CLASSES + + [...] positive character class + [^...] negative character class + [x-y] range (can be used for hex characters) + [[:xxx:]] positive POSIX named set + [[:^xxx:]] negative POSIX named set + + alnum alphanumeric + alpha alphabetic + ascii 0-127 + blank space or tab + cntrl control character + digit decimal digit + graph printing, excluding space + lower lower case letter + print printing, including space + punct printing, excluding alphanumeric + space white space + upper upper case letter + word same as \w + xdigit hexadecimal digit + + In PCRE2, POSIX character set names recognize only ASCII characters by + default, but some of them use Unicode properties if PCRE2_UCP is set. + You can use \Q...\E inside a character class. + + When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes + may be used, allowing nested character classes, combined using set op- + erators. + + [x&&[^y]] UTS#18 extended character class + + x||y set union (OR) + x&&y set intersection (AND) + x--y set difference (AND NOT) + x~~y set symmetric difference (XOR) + + +PERL EXTENDED CHARACTER CLASSES + + (?[...]) Perl extended character class + (?[\p{Thai} & \p{Nd}]) operators; whitespace ignored + (?[(x - y) & z]) parentheses for grouping + + (?[ [^3] & \p{Nd} ]) [...] is a nested ordinary class + (?[ [:alpha:] - [z] ]) POSIX set is allowed outside [...] + (?[ \d - [3] ]) backslash-escaped set is allowed outside + [...] + (?[ !\n & [:ascii:] ]) backslash-escaped character is allowed out- + side [...] + all other characters or ranges must be enclosed + in [...] + + x|y, x+y set union (OR) + x&y set intersection (AND) + x-y set difference (AND NOT) + x^y set symmetric difference (XOR) + !x set complement (NOT) + + Inside a Perl extended character class, [...] switches mode to be in- + terpreted as an ordinary character class. Outside of a nested [...], + the only items permitted are backslash-escapes, POSIX sets, operators, + and parentheses. Inside a nested ordinary class, ^ has its usual mean- + ing (inverts the class when used as the first character); outside of a + nested class, ^ is the XOR operator. + + +QUANTIFIERS + + ? 0 or 1, greedy + ?+ 0 or 1, possessive + ?? 0 or 1, lazy + * 0 or more, greedy + *+ 0 or more, possessive + *? 0 or more, lazy + + 1 or more, greedy + ++ 1 or more, possessive + +? 1 or more, lazy + {n} exactly n + {n,m} at least n, no more than m, greedy + {n,m}+ at least n, no more than m, possessive + {n,m}? at least n, no more than m, lazy + {n,} n or more, greedy + {n,}+ n or more, possessive + {n,}? n or more, lazy + {,m} zero up to m, greedy + {,m}+ zero up to m, possessive + {,m}? zero up to m, lazy + + +ANCHORS AND SIMPLE ASSERTIONS + + \b word boundary + \B not a word boundary + ^ start of subject + also after an internal newline in multiline mode + (after any newline if PCRE2_ALT_CIRCUMFLEX is set) + \A start of subject + $ end of subject + also before newline at end of subject + also before internal newline in multiline mode + \Z end of subject + also before newline at end of subject + \z end of subject + \G first matching position in subject + + +REPORTED MATCH POINT SETTING + + \K set reported start of match + + From release 10.38 \K is not permitted by default in lookaround asser- + tions, for compatibility with Perl. However, if the PCRE2_EXTRA_AL- + LOW_LOOKAROUND_BSK option is set, the previous behaviour is re-enabled. + When this option is set, \K is honoured in positive assertions, but ig- + nored in negative ones. + + +ALTERNATION + + expr|expr|expr... + + +CAPTURING + + (...) capture group + (?...) named capture group (Perl) + (?'name'...) named capture group (Perl) + (?P...) named capture group (Python) + (?:...) non-capture group + (?|...) non-capture group; reset group numbers for + capture groups in each alternative + + In non-UTF modes, names may contain underscores and ASCII letters and + digits; in UTF modes, any Unicode letters and Unicode decimal digits + are permitted. In both cases, a name must not start with a digit. + + +ATOMIC GROUPS + + (?>...) atomic non-capture group + (*atomic:...) atomic non-capture group + + +COMMENT + + (?#....) comment (not nestable) + + +OPTION SETTING + Changes of these options within a group are automatically cancelled at + the end of the group. + + (?a) all ASCII options + (?aD) restrict \d to ASCII in UCP mode + (?aS) restrict \s to ASCII in UCP mode + (?aW) restrict \w to ASCII in UCP mode + (?aP) restrict all POSIX classes to ASCII in UCP mode + (?aT) restrict POSIX digit classes to ASCII in UCP mode + (?i) caseless + (?J) allow duplicate named groups + (?m) multiline + (?n) no auto capture + (?r) restrict caseless to either ASCII or non-ASCII + (?s) single line (dotall) + (?U) default ungreedy (lazy) + (?x) ignore white space except in classes or \Q...\E + (?xx) as (?x) but also ignore space and tab in classes + (?-...) unset the given option(s) + (?^) unset imnrsx options + + (?aP) implies (?aT) as well, though this has no additional effect. How- + ever, it means that (?-aP) also implies (?-aT) and disables all ASCII + restrictions for POSIX classes. + + Unsetting x or xx unsets both. Several options may be set at once, and + a mixture of setting and unsetting such as (?i-x) is allowed, but there + may be only one hyphen. Setting (but no unsetting) is allowed after (?^ + for example (?^in). An option setting may appear at the start of a non- + capture group, for example (?i:...). + + The following are recognized only at the very start of a pattern or af- + ter one of the newline or \R sequences or options with similar syntax. + More than one of them may appear. For the first three, d is a decimal + number. + + (*LIMIT_DEPTH=d) set the backtracking limit to d + (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes + (*LIMIT_MATCH=d) set the match limit to d + (*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) + (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OP- + TIMIZE) + (*TURKISH_CASING) set PCRE2_EXTRA_TURKISH_CASING when matching + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \d + etc) + + Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the + value of the limits set by the caller of pcre2_match() or + pcre2_dfa_match(), not increase them. LIMIT_RECURSION is an obsolete + synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF) + and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, + respectively, at compile time. + + +NEWLINE CONVENTION + + These are recognized only at the very start of the pattern or after op- + tion settings with a similar syntax. + + (*CR) carriage return only + (*LF) linefeed only + (*CRLF) carriage return followed by linefeed + (*ANYCRLF) all three of the above + (*ANY) any Unicode newline sequence + (*NUL) the NUL character (binary zero) + + +WHAT \R MATCHES + + These are recognized only at the very start of the pattern or after op- + tion setting with a similar syntax. + + (*BSR_ANYCRLF) CR, LF, or CRLF + (*BSR_UNICODE) any Unicode newline sequence + + +LOOKAHEAD AND LOOKBEHIND ASSERTIONS + + (?=...) ) + (*pla:...) ) positive lookahead + (*positive_lookahead:...) ) + + (?!...) ) + (*nla:...) ) negative lookahead + (*negative_lookahead:...) ) + + (?<=...) ) + (*plb:...) ) positive lookbehind + (*positive_lookbehind:...) ) + + (? name + 'name' name + + +SCRIPT RUNS + + (*script_run:...) ) script run, can be backtracked into + (*sr:...) ) + + (*atomic_script_run:...) ) atomic script run + (*asr:...) ) + + +BACKREFERENCES + + \n reference by number (can be ambiguous) + \gn reference by number + \g{n} reference by number + \g+n relative reference by number (PCRE2 extension) + \g-n relative reference by number + \g{+n} relative reference by number (PCRE2 extension) + \g{-n} relative reference by number + \k reference by name (Perl) + \k'name' reference by name (Perl) + \g{name} reference by name (Perl) + \k{name} reference by name (.NET) + (?P=name) reference by name (Python) + + +SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) + + (?R) recurse whole pattern + (?n) call subroutine by absolute number + (?+n) call subroutine by relative number + (?-n) call subroutine by relative number + (?&name) call subroutine by name (Perl) + (?P>name) call subroutine by name (Python) + \g call subroutine by name (Oniguruma) + \g'name' call subroutine by name (Oniguruma) + \g call subroutine by absolute number (Oniguruma) + \g'n' call subroutine by absolute number (Oniguruma) + \g<+n> call subroutine by relative number (PCRE2 extension) + \g'+n' call subroutine by relative number (PCRE2 extension) + \g<-n> call subroutine by relative number (PCRE2 extension) + \g'-n' call subroutine by relative number (PCRE2 extension) + + +CONDITIONAL PATTERNS + + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) + + (?(n) absolute reference condition + (?(+n) relative reference condition (PCRE2 extension) + (?(-n) relative reference condition (PCRE2 extension) + (?() named reference condition (Perl) + (?('name') named reference condition (Perl) + (?(name) named reference condition (PCRE2, deprecated) + (?(R) overall recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition + (?(DEFINE) define groups for reference + (?(VERSION[>]=n.m) test PCRE2 version + (?(assert) assertion condition + + Note the ambiguity of (?(R) and (?(Rn) which might be named reference + conditions or recursion tests. Such a condition is interpreted as a + reference condition if the relevant named group exists. + + +BACKTRACKING CONTROL + + All backtracking control verbs may be in the form (*VERB:NAME). For + (*MARK) the name is mandatory, for the others it is optional. (*SKIP) + changes its behaviour if :NAME is present. The others just set a name + for passing back to the caller, but this is not a name that (*SKIP) can + see. The following act immediately they are reached: + + (*ACCEPT) force successful match + (*FAIL) force backtrack; synonym (*F) + (*MARK:NAME) set name to be passed back; synonym (*:NAME) + + The following act only when a subsequent match failure causes a back- + track to reach them. They all force a match failure, but they differ in + what happens afterwards. Those that advance the start-of-match point do + so only if the pattern is not anchored. + + (*COMMIT) overall failure, no advance of starting point + (*PRUNE) advance to next starting character + (*SKIP) advance to current matching position + (*SKIP:NAME) advance to position corresponding to an earlier + (*MARK:NAME); if not found, the (*SKIP) is ignored + (*THEN) local failure, backtrack to next alternation + + The effect of one of these verbs in a group called as a subroutine is + confined to the subroutine call. + + +CALLOUTS + + (?C) callout (assumed number 0) + (?Cn) callout with numerical data n + (?C"text") callout with string data + + The allowed string delimiters are ` ' " ^ % # $ (which are the same for + the start and the end), and the starting delimiter { matched with the + ending delimiter }. To encode the ending delimiter within the string, + double it. + + +REPLACEMENT STRINGS + + If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for + pcre2_substitute() is not interpreted. Otherwise, by default, the only + special character is the dollar character in one of the following + forms: + + $$ insert a dollar character + $n or ${n} insert the contents of group n + $ insert the contents of named group + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string + $*MARK or ${*MARK} insert a control verb name + + For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is + set, there is additional interpretation: + + 1. Backslash is an escape character, and the forms described in "ES- + CAPED CHARACTERS" above are recognized. Also: + + \Q...\E can be used to suppress interpretation + \l force the next character to lower case + \u force the next character to upper case + \L force subsequent characters to lower case + \U force subsequent characters to upper case + \u\L force next character to upper case, then all lower + \l\U force next character to lower case, then all upper + \E end \L or \U case forcing + \b backspace character (note: as in character class in pat- + tern) + \v vertical tab character (note: not the same as in a pattern) + + 2. The Python form \g, where the angle brackets are part of the syn- + tax and n is either a group name or a number, is recognized as an al- + ternative way of inserting the contents of a group, for example \g<3>. + + 3. Capture substitution supports the following additional forms: + + ${n:-string} default for unset group + ${n:+string1:string2} values for set/unset group + + The substitution strings themselves are expanded. Backslash can be used + to escape colons and closing curly brackets. + + +SEE ALSO + + pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), + pcre2(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 November 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 27 November 2024 PCRE2SYNTAX(3) +------------------------------------------------------------------------------ + + +PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) + + +NAME + PCRE2 - Perl-compatible regular expressions (revised API) + + +UNICODE AND UTF SUPPORT + + PCRE2 is normally built with Unicode support, though if you do not need + it, you can build it without, in which case the library will be + smaller. With Unicode support, PCRE2 has knowledge of Unicode character + properties and can process strings of text in UTF-8, UTF-16, and UTF-32 + format (depending on the code unit width), but this is not the default. + Unless specifically requested, PCRE2 treats each code unit in a string + as one character. + + There are two ways of telling PCRE2 to switch to UTF mode, where char- + acters may consist of more than one code unit and the range of values + is constrained. The program can call pcre2_compile() with the PCRE2_UTF + option, or the pattern may start with the sequence (*UTF). However, + the latter facility can be locked out by the PCRE2_NEVER_UTF option. + That is, the programmer can prevent the supplier of the pattern from + switching to UTF mode. + + Note that the PCRE2_MATCH_INVALID_UTF option (see below) forces + PCRE2_UTF to be set. + + In UTF mode, both the pattern and any subject strings that are matched + against it are treated as UTF strings instead of strings of individual + one-code-unit characters. There are also some other changes to the way + characters are handled, as documented below. + + +UNICODE PROPERTY SUPPORT + + When PCRE2 is built with Unicode support, the escape sequences \p{..}, + \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF set- + ting. The Unicode properties that can be tested are a subset of those + that Perl supports. Currently they are limited to the general category + properties such as Lu for an upper case letter or Nd for a decimal num- + ber, the derived properties Any and Lc (synonym L&), the Unicode script + names such as Arabic or Han, Bidi_Class, Bidi_Control, and a few binary + properties. + + The full lists are given in the pcre2pattern and pcre2syntax documenta- + tion. In general, only the short names for properties are supported. + For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is + not supported. Furthermore, in Perl, many properties may optionally be + prefixed by "Is", for compatibility with Perl 5.6. PCRE2 does not sup- + port this. + + +WIDE CHARACTERS AND UTF MODES + + Code points less than 256 can be specified in patterns by either braced + or unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). + Larger values have to use braced sequences. Unbraced octal code points + up to \777 are also recognized; larger ones can be coded using \o{...}. + + The escape sequence \N{U+} is recognized as another way of + specifying a Unicode character by code point in a UTF mode. It is not + allowed in non-UTF mode. + + In UTF mode, repeat quantifiers apply to complete UTF characters, not + to individual code units. + + In UTF mode, the dot metacharacter matches one UTF character instead of + a single code unit. + + In UTF mode, capture group names are not restricted to ASCII, and may + contain any Unicode letters and decimal digits, as well as underscore. + + The escape sequence \C can be used to match a single code unit in UTF + mode, but its use can lead to some strange effects because it breaks up + multi-unit characters (see the description of \C in the pcre2pattern + documentation). For this reason, there is a build-time option that dis- + ables support for \C completely. There is also a less draconian com- + pile-time option for locking out the use of \C when a pattern is com- + piled. + + The use of \C is not supported by the alternative matching function + pcre2_dfa_match() when in UTF-8 or UTF-16 mode, that is, when a charac- + ter may consist of more than one code unit. The use of \C in these + modes provokes a match-time error. Also, the JIT optimization does not + support \C in these modes. If JIT optimization is requested for a UTF-8 + or UTF-16 pattern that contains \C, it will not succeed, and so when + pcre2_match() is called, the matching will be carried out by the inter- + pretive function. + + The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly test + characters of any code value, but, by default, the characters that + PCRE2 recognizes as digits, spaces, or word characters remain the same + set as in non-UTF mode, all with code points less than 256. This re- + mains true even when PCRE2 is built to include Unicode support, because + to do otherwise would slow down matching in many common cases. Note + that this also applies to \b and \B, because they are defined in terms + of \w and \W. If you want to test for a wider sense of, say, "digit", + you can use explicit Unicode property tests such as \p{Nd}. Alterna- + tively, if you set the PCRE2_UCP option, the way that the character es- + capes work is changed so that Unicode properties are used to determine + which characters match, though there are some options that suppress + this for individual escapes. For details see the section on generic + character types in the pcre2pattern documentation. + + Like the escapes, characters that match the POSIX named character + classes are all low-valued characters unless the PCRE2_UCP option is + set, but there is an option to override this. + + In contrast to the character escapes and character classes, the special + horizontal and vertical white space escapes (\h, \H, \v, and \V) do + match all the appropriate Unicode characters, whether or not PCRE2_UCP + is set. + + +UNICODE CASE-EQUIVALENCE + + If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing + makes use of Unicode properties except for characters whose code points + are less than 128 and that have at most two case-equivalent values. For + these, a direct table lookup is used for speed. A few Unicode charac- + ters such as Greek sigma have more than two code points that are case- + equivalent, and these are treated specially. Setting PCRE2_UCP without + PCRE2_UTF allows Unicode-style case processing for non-UTF character + encodings such as UCS-2. + + There are two ASCII characters (S and K) that, in addition to their + ASCII lower case equivalents, have a non-ASCII one as well (long S and + Kelvin sign). Recognition of these non-ASCII characters as case-equiv- + alent to their ASCII counterparts can be disabled by setting the + PCRE2_EXTRA_CASELESS_RESTRICT option. When this is set, all characters + in a case equivalence must either be ASCII or non-ASCII; there can be + no mixing. + + Without PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' = U+212A (Kelvin sign) + 's' = 'S' = U+017F (long S) + With PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' + U+212A (Kelvin sign) only case-equivalent to itself + 's' = 'S' + U+017F (long S) only case-equivalent to itself + + One language family, Turkish and Azeri, has its own case-insensitivity + rules, which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. + This alters the behaviour of the 'i', 'I', U+0130 (capital I with dot + above), and U+0131 (small dotless i) characters. + + Without PCRE2_EXTRA_TURKISH_CASING: + 'i' = 'I' + U+0130 (capital I with dot above) only case-equivalent to itself + U+0131 (small dotless i) only case-equivalent to itself + With PCRE2_EXTRA_TURKISH_CASING: + 'i' = U+0130 (capital I with dot above) + U+0131 (small dotless i) = 'I' + + It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and + PCRE2_EXTRA_TURKISH_CASING together. + + From release 10.45 the Unicode letter properties Lu (upper case), Ll + (lower case), and Lt (title case) are all treated as Lc (cased letter) + when caseless matching is set by the PCRE2_CASELESS option or (?i) + within the pattern. + + +SCRIPT RUNS + + The pattern constructs (*script_run:...) and (*atomic_script_run:...), + with synonyms (*sr:...) and (*asr:...), verify that the string matched + within the parentheses is a script run. In concept, a script run is a + sequence of characters that are all from the same Unicode script. How- + ever, because some scripts are commonly used together, and because some + diacritical and other marks are used with multiple scripts, it is not + that simple. + + Every Unicode character has a Script property, mostly with a value cor- + responding to the name of a script, such as Latin, Greek, or Cyrillic. + There are also three special values: + + "Unknown" is used for code points that have not been assigned, and also + for the surrogate code points. In the PCRE2 32-bit library, characters + whose code points are greater than the Unicode maximum (U+10FFFF), + which are accessible only in non-UTF mode, are assigned the Unknown + script. + + "Common" is used for characters that are used with many scripts. These + include punctuation, emoji, mathematical, musical, and currency sym- + bols, and the ASCII digits 0 to 9. + + "Inherited" is used for characters such as diacritical marks that mod- + ify a previous character. These are considered to take on the script of + the character that they modify. + + Some Inherited characters are used with many scripts, but many of them + are only normally used with a small number of scripts. For example, + U+102E0 (Coptic Epact thousands mark) is used only with Arabic and Cop- + tic. In order to make it possible to check this, a Unicode property + called Script Extension exists. Its value is a list of scripts that ap- + ply to the character. For the majority of characters, the list contains + just one script, the same one as the Script property. However, for + characters such as U+102E0 more than one Script is listed. There are + also some Common characters that have a single, non-Common script in + their Script Extension list. + + The next section describes the basic rules for deciding whether a given + string of characters is a script run. Note, however, that there are + some special cases involving the Chinese Han script, and an additional + constraint for decimal digits. These are covered in subsequent sec- + tions. + + Basic script run rules + + A string that is less than two characters long is a script run. This is + the only case in which an Unknown character can be part of a script + run. Longer strings are checked using only the Script Extensions prop- + erty, not the basic Script property. + + If a character's Script Extension property is the single value "Inher- + ited", it is always accepted as part of a script run. This is also true + for the property "Common", subject to the checking of decimal digits + described below. All the remaining characters in a script run must have + at least one script in common in their Script Extension lists. In set- + theoretic terminology, the intersection of all the sets of scripts must + not be empty. + + A simple example is an Internet name such as "google.com". The letters + are all in the Latin script, and the dot is Common, so this string is a + script run. However, the Cyrillic letter "o" looks exactly the same as + the Latin "o"; a string that looks the same, but with Cyrillic "o"s is + not a script run. + + More interesting examples involve characters with more than one script + in their Script Extension. Consider the following characters: + + U+060C Arabic comma + U+06D4 Arabic full stop + + The first has the Script Extension list Arabic, Hanifi Rohingya, Syr- + iac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both + of them could appear in script runs of either Arabic or Hanifi Ro- + hingya. The first could also appear in Syriac or Thaana script runs, + but the second could not. + + The Chinese Han script + + The Chinese Han script is commonly used in conjunction with other + scripts for writing certain languages. Japanese uses the Hiragana and + Katakana scripts together with Han; Korean uses Hangul and Han; Tai- + wanese Mandarin uses Bopomofo and Han. These three combinations are + treated as special cases when checking script runs and are, in effect, + "virtual scripts". Thus, a script run may contain a mixture of Hira- + gana, Katakana, and Han, or a mixture of Hangul and Han, or a mixture + of Bopomofo and Han, but not, for example, a mixture of Hangul and + Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical Stan- + dard 39 ("Unicode Security Mechanisms", http://unicode.org/re- + ports/tr39/) in allowing such mixtures. + + Decimal digits + + Unicode contains many sets of 10 decimal digits in different scripts, + and some scripts (including the Common script) contain more than one + set. Some of these decimal digits them are visually indistinguishable + from the common ASCII digits. In addition to the script checking de- + scribed above, if a script run contains any decimal digits, they must + all come from the same set of 10 adjacent characters. + + +VALIDITY OF UTF STRINGS + + When the PCRE2_UTF option is set, the strings passed as patterns and + subjects are (by default) checked for validity on entry to the relevant + functions. If an invalid UTF string is passed, a negative error code is + returned. The code unit offset to the offending character can be ex- + tracted from the match data block by calling pcre2_get_startchar(), + which is used for this purpose after a UTF error. + + In some situations, you may already know that your strings are valid, + and therefore want to skip these checks in order to improve perfor- + mance, for example in the case of a long subject string that is being + scanned repeatedly. If you set the PCRE2_NO_UTF_CHECK option at com- + pile time or at match time, PCRE2 assumes that the pattern or subject + it is given (respectively) contains only valid UTF code unit sequences. + + If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the + result is undefined and your program may crash or loop indefinitely or + give incorrect results. There is, however, one mode of matching that + can handle invalid UTF subject strings. This is enabled by passing + PCRE2_MATCH_INVALID_UTF to pcre2_compile() and is discussed below in + the next section. The rest of this section covers the case when + PCRE2_MATCH_INVALID_UTF is not set. + + Passing PCRE2_NO_UTF_CHECK to pcre2_compile() just disables the UTF + check for the pattern; it does not also apply to subject strings. If + you want to disable the check for a subject string you must pass this + same option to pcre2_match() or pcre2_dfa_match(). + + UTF-16 and UTF-32 strings can indicate their endianness by special code + knows as a byte-order mark (BOM). The PCRE2 functions do not handle + this, expecting strings to be in host byte order. + + Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any + other processing takes place. In the case of pcre2_match() and + pcre2_dfa_match() calls with a non-zero starting offset, the check is + applied only to that part of the subject that could be inspected during + matching, and there is a check that the starting offset points to the + first code unit of a character or to the end of the subject. If there + are no lookbehind assertions in the pattern, the check starts at the + starting offset. Otherwise, it starts at the length of the longest + lookbehind before the starting offset, or at the start of the subject + if there are not that many characters before the starting offset. Note + that the sequences \b and \B are one-character lookbehinds. + + In addition to checking the format of the string, there is a check to + ensure that all code points lie in the range U+0 to U+10FFFF, excluding + the surrogate area. The so-called "non-character" code points are not + excluded because Unicode corrigendum #9 makes it clear that they should + not be. + + Characters in the "Surrogate Area" of Unicode are reserved for use by + UTF-16, where they are used in pairs to encode code points with values + greater than 0xFFFF. The code points that are encoded by UTF-16 pairs + are available independently in the UTF-8 and UTF-32 encodings. (In + other words, the whole surrogate thing is a fudge for UTF-16 which un- + fortunately messes up UTF-8 and UTF-32.) + + Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error + that is given if an escape sequence for an invalid Unicode code point + is encountered in the pattern. If you want to allow escape sequences + such as \x{d800} (a surrogate code point) you can set the PCRE2_EX- + TRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible + only in UTF-8 and UTF-32 modes, because these values are not repre- + sentable in UTF-16. + + Errors in UTF-8 strings + + The following negative error codes are given for invalid UTF-8 strings: + + PCRE2_ERROR_UTF8_ERR1 + PCRE2_ERROR_UTF8_ERR2 + PCRE2_ERROR_UTF8_ERR3 + PCRE2_ERROR_UTF8_ERR4 + PCRE2_ERROR_UTF8_ERR5 + + The string ends with a truncated UTF-8 character; the code specifies + how many bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 + characters to be no longer than 4 bytes, the encoding scheme (origi- + nally defined by RFC 2279) allows for up to 6 bytes, and this is + checked first; hence the possibility of 4 or 5 missing bytes. + + PCRE2_ERROR_UTF8_ERR6 + PCRE2_ERROR_UTF8_ERR7 + PCRE2_ERROR_UTF8_ERR8 + PCRE2_ERROR_UTF8_ERR9 + PCRE2_ERROR_UTF8_ERR10 + + The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of + the character do not have the binary value 0b10 (that is, either the + most significant bit is 0, or the next bit is 1). + + PCRE2_ERROR_UTF8_ERR11 + PCRE2_ERROR_UTF8_ERR12 + + A character that is valid by the RFC 2279 rules is either 5 or 6 bytes + long; these code points are excluded by RFC 3629. + + PCRE2_ERROR_UTF8_ERR13 + + A 4-byte character has a value greater than 0x10ffff; these code points + are excluded by RFC 3629. + + PCRE2_ERROR_UTF8_ERR14 + + A 3-byte character has a value in the range 0xd800 to 0xdfff; this + range of code points are reserved by RFC 3629 for use with UTF-16, and + so are excluded from UTF-8. + + PCRE2_ERROR_UTF8_ERR15 + PCRE2_ERROR_UTF8_ERR16 + PCRE2_ERROR_UTF8_ERR17 + PCRE2_ERROR_UTF8_ERR18 + PCRE2_ERROR_UTF8_ERR19 + + A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes + for a value that can be represented by fewer bytes, which is invalid. + For example, the two bytes 0xc0, 0xae give the value 0x2e, whose cor- + rect coding uses just one byte. + + PCRE2_ERROR_UTF8_ERR20 + + The two most significant bits of the first byte of a character have the + binary value 0b10 (that is, the most significant bit is 1 and the sec- + ond is 0). Such a byte can only validly occur as the second or subse- + quent byte of a multi-byte character. + + PCRE2_ERROR_UTF8_ERR21 + + The first byte of a character has the value 0xfe or 0xff. These values + can never occur in a valid UTF-8 string. + + Errors in UTF-16 strings + + The following negative error codes are given for invalid UTF-16 + strings: + + PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string + PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate + + + Errors in UTF-32 strings + + The following negative error codes are given for invalid UTF-32 + strings: + + PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) + PCRE2_ERROR_UTF32_ERR2 Code point is greater than 0x10ffff + + +MATCHING IN INVALID UTF STRINGS + + You can run pattern matches on subject strings that may contain invalid + UTF sequences if you call pcre2_compile() with the PCRE2_MATCH_IN- + VALID_UTF option. This is supported by pcre2_match(), including JIT + matching, but not by pcre2_dfa_match(). When PCRE2_MATCH_INVALID_UTF is + set, it forces PCRE2_UTF to be set as well. Note, however, that the + pattern itself must be a valid UTF string. + + If you do not set PCRE2_MATCH_INVALID_UTF when calling pcre2_compile, + and you are not certain that your subject strings are valid UTF se- + quences, you should not make use of the JIT "fast path" function + pcre2_jit_match() because it bypasses sanity checks, including the one + for UTF validity. An invalid string may cause undefined behaviour, in- + cluding looping, crashing, or giving the wrong answer. + + Setting PCRE2_MATCH_INVALID_UTF does not affect what pcre2_compile() + generates, but if pcre2_jit_compile() is subsequently called, it does + generate different code. If JIT is not used, the option affects the be- + haviour of the interpretive code in pcre2_match(). When PCRE2_MATCH_IN- + VALID_UTF is set at compile time, PCRE2_NO_UTF_CHECK is ignored at + match time. + + In this mode, an invalid code unit sequence in the subject never + matches any pattern item. It does not match dot, it does not match + \p{Any}, it does not even match negative items such as [^X]. A lookbe- + hind assertion fails if it encounters an invalid sequence while moving + the current point backwards. In other words, an invalid UTF code unit + sequence acts as a barrier which no match can cross. + + You can also think of this as the subject being split up into fragments + of valid UTF, delimited internally by invalid code unit sequences. The + pattern is matched fragment by fragment. The result of a successful + match, however, is given as code unit offsets in the entire subject + string in the usual way. There are a few points to consider: + + The internal boundaries are not interpreted as the beginnings or ends + of lines and so do not match circumflex or dollar characters in the + pattern. + + If pcre2_match() is called with an offset that points to an invalid + UTF-sequence, that sequence is skipped, and the match starts at the + next valid UTF character, or the end of the subject. + + At internal fragment boundaries, \b and \B behave in the same way as at + the beginning and end of the subject. For example, a sequence such as + \bWORD\b would match an instance of WORD that is surrounded by invalid + UTF code units. + + Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbi- + trary data, knowing that any matched strings that are returned are + valid UTF. This can be useful when searching for UTF text in executable + or other binary files. + + Note, however, that the 16-bit and 32-bit PCRE2 libraries process + strings as sequences of uint16_t or uint32_t code points. They cannot + find valid UTF sequences within an arbitrary string of bytes unless + such sequences are suitably aligned. + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 27 November 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 27 November 2024 PCRE2UNICODE(3) +------------------------------------------------------------------------------ + + diff --git a/3rd/pcre2/doc/pcre2_callout_enumerate.3 b/3rd/pcre2/doc/pcre2_callout_enumerate.3 new file mode 100644 index 00000000..0d41eca2 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_callout_enumerate.3 @@ -0,0 +1,51 @@ +.TH PCRE2_COMPILE 3 "23 March 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function scans a compiled regular expression and calls the \fIcallback()\fP +function for each callout within the pattern. The yield of the function is zero +for success and non-zero otherwise. The arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIcallback\fP The callback function + \fIcallout_data\fP User data that is passed to the callback +.sp +The \fIcallback()\fP function is passed a pointer to a data block containing +the following fields (not necessarily in this order): +.sp + uint32_t \fIversion\fP Block version number + uint32_t \fIcallout_number\fP Number for numbered callouts + PCRE2_SIZE \fIpattern_position\fP Offset to next item in pattern + PCRE2_SIZE \fInext_item_length\fP Length of next item in pattern + PCRE2_SIZE \fIcallout_string_offset\fP Offset to string within pattern + PCRE2_SIZE \fIcallout_string_length\fP Length of callout string + PCRE2_SPTR \fIcallout_string\fP Points to callout string or is NULL +.sp +The second argument passed to the \fBcallback()\fP function is the callout data +that was passed to \fBpcre2_callout_enumerate()\fP. The \fBcallback()\fP +function must return zero for success. Any other value causes the pattern scan +to stop, with the value being passed back as the result of +\fBpcre2_callout_enumerate()\fP. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_code_copy.3 b/3rd/pcre2/doc/pcre2_code_copy.3 new file mode 100644 index 00000000..97fbea13 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_code_copy.3 @@ -0,0 +1,31 @@ +.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. The +pointer to the character tables is copied, not the tables themselves (see +\fBpcre2_code_copy_with_tables()\fP). The yield of the function is NULL if +\fIcode\fP is NULL or if sufficient memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_code_copy_with_tables.3 b/3rd/pcre2/doc/pcre2_code_copy_with_tables.3 new file mode 100644 index 00000000..9cb62993 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_code_copy_with_tables.3 @@ -0,0 +1,32 @@ +.TH PCRE2_CODE_COPY 3 "16 January 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a copy of the memory used for a compiled pattern, excluding +any memory used by the JIT compiler. Without a subsequent call to +\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. +Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also +made, with the new code pointing to it. This memory will be automatically freed +when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if +\fIcode\fP is NULL or if sufficient memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_code_free.3 b/3rd/pcre2/doc/pcre2_code_free.3 new file mode 100644 index 00000000..2d979bf9 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_code_free.3 @@ -0,0 +1,30 @@ +.TH PCRE2_CODE_FREE 3 "28 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +If \fIcode\fP is NULL, this function does nothing. Otherwise, \fIcode\fP must +point to a compiled pattern. This function frees its memory, including any +memory used by the JIT compiler. If the compiled pattern was created by a call +to \fBpcre2_code_copy_with_tables()\fP, the memory for the character tables is +also freed. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_compile.3 b/3rd/pcre2/doc/pcre2_compile.3 new file mode 100644 index 00000000..29a3dbed --- /dev/null +++ b/3rd/pcre2/doc/pcre2_compile.3 @@ -0,0 +1,107 @@ +.TH PCRE2_COMPILE 3 "30 October 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_code *pcre2_compile(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" +.B " pcre2_compile_context *\fIccontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function compiles a regular expression pattern into an internal form. Its +arguments are: +.sp + \fIpattern\fP A string containing expression to be compiled + \fIlength\fP The length of the string or PCRE2_ZERO_TERMINATED + \fIoptions\fP Primary option bits + \fIerrorcode\fP Where to put an error code + \fIerroffset\fP Where to put an error offset + \fIccontext\fP Pointer to a compile context or NULL +.sp +The length of the pattern and any error offset that is returned are in code +units, not characters. A NULL pattern with zero length is treated as an empty +string. A compile context is needed only if you want to provide custom memory +allocation functions, or to provide an external function for system stack size +checking (see \fBpcre2_set_compile_recursion_guard()\fP), or to change one or +more of these parameters: +.sp + What \eR matches (Unicode newlines, or CR, LF, CRLF only); + PCRE2's character tables; + The newline character sequence; + The compile time nested parentheses limit; + The maximum pattern length (in code units) that is allowed; + The additional options bits. +.sp +The primary option bits are: +.sp + PCRE2_ANCHORED Force pattern anchoring + PCRE2_ALLOW_EMPTY_CLASS Allow empty classes + PCRE2_ALT_BSUX Alternative handling of \eu, \eU, and \ex + PCRE2_ALT_CIRCUMFLEX Alternative handling of ^ in multiline mode + PCRE2_ALT_EXTENDED_CLASS Alternative extended character class syntax + PCRE2_ALT_VERBNAMES Process backslashes in verb names + PCRE2_AUTO_CALLOUT Compile automatic callouts + PCRE2_CASELESS Do caseless matching + PCRE2_DOLLAR_ENDONLY $ not to match newline at end + PCRE2_DOTALL . matches anything including NL + PCRE2_DUPNAMES Allow duplicate names for subpatterns + PCRE2_ENDANCHORED Pattern can match only at end of subject + PCRE2_EXTENDED Ignore white space and # comments + PCRE2_FIRSTLINE Force matching to be before newline + PCRE2_LITERAL Pattern characters are all literal + PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF + PCRE2_MATCH_UNSET_BACKREF Match unset backreferences + PCRE2_MULTILINE ^ and $ match newlines within data + PCRE2_NEVER_BACKSLASH_C Lock out the use of \eC in patterns + PCRE2_NEVER_UCP Lock out PCRE2_UCP, e.g. via (*UCP) + PCRE2_NEVER_UTF Lock out PCRE2_UTF, e.g. via (*UTF) + PCRE2_NO_AUTO_CAPTURE Disable numbered capturing paren- + theses (named ones available) + PCRE2_NO_AUTO_POSSESS Disable auto-possessification + PCRE2_NO_DOTSTAR_ANCHOR Disable automatic anchoring for .* + PCRE2_NO_START_OPTIMIZE Disable match-time start optimizations + PCRE2_NO_UTF_CHECK Do not check the pattern for UTF validity + (only relevant if PCRE2_UTF is set) + PCRE2_UCP Use Unicode properties for \ed, \ew, etc. + PCRE2_UNGREEDY Invert greediness of quantifiers + PCRE2_USE_OFFSET_LIMIT Enable offset limit for unanchored matching + PCRE2_UTF Treat pattern and subjects as UTF strings +.sp +PCRE2 must be built with Unicode support (the default) in order to use +PCRE2_UTF, PCRE2_UCP and related options. +.P +Additional options may be set in the compile context via the +.\" HREF +\fBpcre2_set_compile_extra_options\fP +.\" +function. +.P +If either of \fIerrorcode\fP or \fIerroroffset\fP is NULL, the function returns +NULL immediately. Otherwise, the yield of this function is a pointer to a +private data structure that contains the compiled pattern, or NULL if an error +was detected. In the error case, a text error message can be obtained by +passing the value returned via the \fIerrorcode\fP argument to the +\fBpcre2_get_error_message()\fP function. The offset (in code units) where the +error was encountered is returned via the \fIerroroffset\fP argument. +.P +If there is no error, the value passed via \fIerrorcode\fP returns the message +"no error" if passed to \fBpcre2_get_error_message()\fP, and the value passed +via \fIerroroffset\fP is zero. +.P +There is a complete description of the PCRE2 native API, with more detail on +each option, in the +.\" HREF +\fBpcre2api\fP +.\" +page, and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_compile_context_copy.3 b/3rd/pcre2/doc/pcre2_compile_context_copy.3 new file mode 100644 index 00000000..bdd36fd8 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_compile_context_copy.3 @@ -0,0 +1,29 @@ +.TH PCRE2_COMPILE_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_compile_context *pcre2_compile_context_copy( +.B " pcre2_compile_context *\fIccontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a new copy of a compile context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_compile_context_create.3 b/3rd/pcre2/doc/pcre2_compile_context_create.3 new file mode 100644 index 00000000..261f42d9 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_compile_context_create.3 @@ -0,0 +1,30 @@ +.TH PCRE2_COMPILE_CONTEXT_CREATE 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_compile_context *pcre2_compile_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates and initializes a new compile context. If its argument is +NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_compile_context_free.3 b/3rd/pcre2/doc/pcre2_compile_context_free.3 new file mode 100644 index 00000000..359f5934 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_compile_context_free.3 @@ -0,0 +1,29 @@ +.TH PCRE2_COMPILE_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory occupied by a compile context, using the memory +freeing function from the general context with which it was created, or +\fBfree()\fP if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_config.3 b/3rd/pcre2/doc/pcre2_config.3 new file mode 100644 index 00000000..68ff105f --- /dev/null +++ b/3rd/pcre2/doc/pcre2_config.3 @@ -0,0 +1,76 @@ +.TH PCRE2_CONFIG 3 "16 September 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP); +. +.SH DESCRIPTION +.rs +.sp +This function makes it possible for a client program to find out which optional +features are available in the version of the PCRE2 library it is using. The +arguments are as follows: +.sp + \fIwhat\fP A code specifying what information is required + \fIwhere\fP Points to where to put the information +.sp +If \fIwhere\fP is NULL, the function returns the amount of memory needed for +the requested information. When the information is a string, the value is in +code units; for other types of data it is in bytes. +.P +If \fBwhere\fP is not NULL, for PCRE2_CONFIG_JITTARGET, +PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a +buffer that is large enough to hold the string. For all other codes it must +point to a uint32_t integer variable. The available codes are: +.sp + PCRE2_CONFIG_BSR Indicates what \eR matches by default: + PCRE2_BSR_UNICODE + PCRE2_BSR_ANYCRLF + PCRE2_CONFIG_COMPILED_WIDTHS Which of 8/16/32 support was compiled + PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit + PCRE2_CONFIG_HEAPLIMIT Default heap memory limit +.\" JOIN + PCRE2_CONFIG_JIT Availability of just-in-time compiler + support (1=yes 0=no) +.\" JOIN + PCRE2_CONFIG_JITTARGET Information (a string) about the target + architecture for the JIT compiler + PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4) + PCRE2_CONFIG_MATCHLIMIT Default internal resource limit + PCRE2_CONFIG_NEVER_BACKSLASH_C Whether or not \eC is disabled + PCRE2_CONFIG_NEWLINE Code for the default newline sequence: + PCRE2_NEWLINE_CR + PCRE2_NEWLINE_LF + PCRE2_NEWLINE_CRLF + PCRE2_NEWLINE_ANY + PCRE2_NEWLINE_ANYCRLF + PCRE2_NEWLINE_NUL + PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit + PCRE2_CONFIG_RECURSIONLIMIT Obsolete: use PCRE2_CONFIG_DEPTHLIMIT + PCRE2_CONFIG_STACKRECURSE Obsolete: always returns 0 +.\" JOIN + PCRE2_CONFIG_UNICODE Availability of Unicode support (1=yes + 0=no) + PCRE2_CONFIG_UNICODE_VERSION The Unicode version (a string) + PCRE2_CONFIG_VERSION The PCRE2 version (a string) +.sp +The function yields a non-negative value on success or the negative value +PCRE2_ERROR_BADOPTION otherwise. This is also the result for the +PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is +requested, the function returns the number of code units used, including the +terminating zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_convert_context_copy.3 b/3rd/pcre2/doc/pcre2_convert_context_copy.3 new file mode 100644 index 00000000..4e866eb1 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_convert_context_copy.3 @@ -0,0 +1,26 @@ +.TH PCRE2_CONVERT_CONTEXT_COPY 3 "12 July 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_convert_context *pcre2_convert_context_copy( +.B " pcre2_convert_context *\fIcvcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It makes a new copy of a convert context, using the memory allocation function +that was used for the original context. The result is NULL if the memory cannot +be obtained. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_convert_context_create.3 b/3rd/pcre2/doc/pcre2_convert_context_create.3 new file mode 100644 index 00000000..dccc10ab --- /dev/null +++ b/3rd/pcre2/doc/pcre2_convert_context_create.3 @@ -0,0 +1,27 @@ +.TH PCRE2_CONVERT_CONTEXT_CREATE 3 "12 July 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_convert_context *pcre2_convert_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It creates and initializes a new convert context. If its argument is +NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_convert_context_free.3 b/3rd/pcre2/doc/pcre2_convert_context_free.3 new file mode 100644 index 00000000..e3b73ac8 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_convert_context_free.3 @@ -0,0 +1,26 @@ +.TH PCRE2_CONVERT_CONTEXT_FREE 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a convert context, using the memory +freeing function from the general context with which it was created, or +\fBfree()\fP if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_converted_pattern_free.3 b/3rd/pcre2/doc/pcre2_converted_pattern_free.3 new file mode 100644 index 00000000..551d7210 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_converted_pattern_free.3 @@ -0,0 +1,26 @@ +.TH PCRE2_CONVERTED_PATTERN_FREE 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It frees the memory occupied by a converted pattern that was obtained by +calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place +the converted pattern into newly obtained heap memory. If the argument is NULL, +the function returns immediately without doing anything. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_dfa_match.3 b/3rd/pcre2/doc/pcre2_dfa_match.3 new file mode 100644 index 00000000..316a954e --- /dev/null +++ b/3rd/pcre2/doc/pcre2_dfa_match.3 @@ -0,0 +1,86 @@ +.TH PCRE2_DFA_MATCH 3 "31 August 2021" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_dfa_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP," +.B " int *\fIworkspace\fP, PCRE2_SIZE \fIwscount\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using an alternative matching algorithm that scans the subject string +just once (except when processing lookaround assertions). This function is +\fInot\fP Perl-compatible (the Perl-compatible matching function is +\fBpcre2_match()\fP). The arguments for this function are: +.sp + \fIcode\fP Points to the compiled pattern + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fImatch_data\fP Points to a match data block, for results + \fImcontext\fP Points to a match context, or is NULL + \fIworkspace\fP Points to a vector of ints used as working space + \fIwscount\fP Number of elements in the vector +.sp +The size of output vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of parentheses in the +pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the match +data block is therefore not advisable when using this function. +.P +A match context is needed only if you want to set up a callout function or +specify the heap limit or the match or the recursion depth limits. The +\fIlength\fP and \fIstartoffset\fP values are code units, not characters. The +options are: +.sp + PCRE2_ANCHORED Match only at the first position + PCRE2_COPY_MATCHED_SUBJECT + On success, make a private subject copy + PCRE2_ENDANCHORED Pattern can match only at end of subject + PCRE2_NOTBOL Subject is not the beginning of a line + PCRE2_NOTEOL Subject is not the end of a line + PCRE2_NOTEMPTY An empty string is not a valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check the subject for UTF + validity (only relevant if PCRE2_UTF + was set at compile time) +.\" JOIN + PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial + match even if there is a full match +.\" JOIN + PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial + match if no full matches are found + PCRE2_DFA_RESTART Restart after a partial match + PCRE2_DFA_SHORTEST Return only the shortest match +.sp +There are restrictions on what may appear in a pattern when using this matching +function. Details are given in the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. For details of partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +page. There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_general_context_copy.3 b/3rd/pcre2/doc/pcre2_general_context_copy.3 new file mode 100644 index 00000000..cd4fd3c0 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_general_context_copy.3 @@ -0,0 +1,30 @@ +.TH PCRE2_GENERAL_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_general_context *pcre2_general_context_copy( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a new copy of a general context, using the memory +allocation functions in the context, if set, to get the necessary memory. +Otherwise \fBmalloc()\fP is used. The result is NULL if the memory cannot be +obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_general_context_create.3 b/3rd/pcre2/doc/pcre2_general_context_create.3 new file mode 100644 index 00000000..40d0ec6d --- /dev/null +++ b/3rd/pcre2/doc/pcre2_general_context_create.3 @@ -0,0 +1,32 @@ +.TH PCRE2_GENERAL_CONTEXT_CREATE 3 "23 January 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_general_context *pcre2_general_context_create( +.B " void *(*\fIprivate_malloc\fP)(size_t, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates and initializes a general context. The arguments define +custom memory management functions and a data value that is passed to them when +they are called. The \fBprivate_malloc()\fP function is used to get memory for +the context. If either of the first two arguments is NULL, the system memory +management function is used. The result is NULL if no memory could be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_general_context_free.3 b/3rd/pcre2/doc/pcre2_general_context_free.3 new file mode 100644 index 00000000..4c65b2a9 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_general_context_free.3 @@ -0,0 +1,28 @@ +.TH PCRE2_GENERAL_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory occupied by a general context, using the memory +freeing function within the context, if set. If the argument is NULL, the +function returns immediately without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_error_message.3 b/3rd/pcre2/doc/pcre2_get_error_message.3 new file mode 100644 index 00000000..5ef62bd2 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_error_message.3 @@ -0,0 +1,39 @@ +.TH PCRE2_GET_ERROR_MESSAGE 3 "24 March 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function provides a textual error message for each PCRE2 error code. +Compilation errors are positive numbers; UTF formatting errors and matching +errors are negative numbers. The arguments are: +.sp + \fIerrorcode\fP an error code (positive or negative) + \fIbuffer\fP where to put the message + \fIbufflen\fP the length of the buffer (code units) +.sp +The function returns the length of the message in code units, excluding the +trailing zero, or the negative error code PCRE2_ERROR_NOMEMORY if the buffer is +too small. In this case, the returned message is truncated (but still with a +trailing zero). If \fIerrorcode\fP does not contain a recognized error code +number, the negative value PCRE2_ERROR_BADDATA is returned. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_mark.3 b/3rd/pcre2/doc/pcre2_get_mark.3 new file mode 100644 index 00000000..9f8e78ce --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_mark.3 @@ -0,0 +1,34 @@ +.TH PCRE2_GET_MARK 3 "13 January 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +After a call of \fBpcre2_match()\fP that was passed the match block that is +this function's argument, this function returns a pointer to the last (*MARK), +(*PRUNE), or (*THEN) name that was encountered during the matching process. The +name is zero-terminated, and is within the compiled pattern. The length of the +name is in the preceding code unit. If no name is available, NULL is returned. +.P +After a successful match, the name that is returned is the last one on the +matching path. After a failed match or a partial match, the last encountered +name is returned. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_match_data_heapframes_size.3 b/3rd/pcre2/doc/pcre2_get_match_data_heapframes_size.3 new file mode 100644 index 00000000..d1b9ba68 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_match_data_heapframes_size.3 @@ -0,0 +1,28 @@ +.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "18 January 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE pcre2_get_match_data_heapframes_size( +.B " pcre2_match_data *\fImatch_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the size, in bytes, of the heapframes data block that is +owned by its argument. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_match_data_size.3 b/3rd/pcre2/doc/pcre2_get_match_data_size.3 new file mode 100644 index 00000000..f2e2b27a --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_match_data_size.3 @@ -0,0 +1,27 @@ +.TH PCRE2_GET_MATCH_DATA_SIZE 3 "17 October 2019" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the size, in bytes, of the match data block that is its +argument. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_ovector_count.3 b/3rd/pcre2/doc/pcre2_get_ovector_count.3 new file mode 100644 index 00000000..c6d866cd --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_ovector_count.3 @@ -0,0 +1,27 @@ +.TH PCRE2_GET_OVECTOR_COUNT 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the number of pairs of offsets in the ovector that forms +part of the given match data block. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_ovector_pointer.3 b/3rd/pcre2/doc/pcre2_get_ovector_pointer.3 new file mode 100644 index 00000000..8c89eee6 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_ovector_pointer.3 @@ -0,0 +1,28 @@ +.TH PCRE2_GET_OVECTOR_POINTER 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns a pointer to the vector of offsets that forms part of the +given match data block. The number of pairs can be found by calling +\fBpcre2_get_ovector_count()\fP. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_get_startchar.3 b/3rd/pcre2/doc/pcre2_get_startchar.3 new file mode 100644 index 00000000..293913eb --- /dev/null +++ b/3rd/pcre2/doc/pcre2_get_startchar.3 @@ -0,0 +1,32 @@ +.TH PCRE2_GET_STARTCHAR 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +After a successful call of \fBpcre2_match()\fP that was passed the match block +that is this function's argument, this function returns the code unit offset of +the character at which the successful match started. For a non-partial match, +this can be different to the value of \fIovector[0]\fP if the pattern contains +the \eK escape sequence. After a partial match, however, this value is always +the same as \fIovector[0]\fP because \eK does not affect the result of a +partial match. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_jit_compile.3 b/3rd/pcre2/doc/pcre2_jit_compile.3 new file mode 100644 index 00000000..c3cc49a9 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_jit_compile.3 @@ -0,0 +1,61 @@ +.TH PCRE2_JIT_COMPILE 3 "22 August 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_jit_compile(pcre2_code *\fIcode\fP, uint32_t \fIoptions\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function requests JIT compilation, which, if the just-in-time compiler is +available, further processes a compiled pattern into machine code that executes +much faster than the \fBpcre2_match()\fP interpretive matching function. Full +details are given in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. +.P +The availability of JIT support can be tested by calling +\fBpcre2_compile_jit()\fP with a single option PCRE2_JIT_TEST_ALLOC (the +code argument is ignored, so a NULL value is accepted). Such a call +returns zero if JIT is available and has a working allocator. Otherwise +it returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate +executable memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not +compiled. +.P +Otherwise, the first argument must be a pointer that was returned by a +successful call to \fBpcre2_compile()\fP, and the second must contain one or +more of the following bits: +.sp + PCRE2_JIT_COMPLETE compile code for full matching + PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching + PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching +.sp +There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been +superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old +option is deprecated and may be removed in the future. +.P +The yield of the function when called with any of the three options above is 0 +for success, or a negative error code otherwise. In particular, +PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or if an unknown +bit is set in \fIoptions\fP. The function can also return PCRE2_ERROR_NOMEMORY +if JIT is unable to allocate executable memory for the compiler, even if it was +because of a system security restriction. In a few cases, the function may +return with PCRE2_ERROR_JIT_UNSUPPORTED for unsupported features. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_jit_free_unused_memory.3 b/3rd/pcre2/doc/pcre2_jit_free_unused_memory.3 new file mode 100644 index 00000000..2eb3e154 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_jit_free_unused_memory.3 @@ -0,0 +1,31 @@ +.TH PCRE2_JIT_FREE_UNUSED_MEMORY 3 "24 April 2020" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees unused JIT executable memory. The argument is a general +context, for custom memory management, or NULL for standard memory management. +JIT memory allocation retains some memory in order to improve future JIT +compilation speed. In low memory conditions, +\fBpcre2_jit_free_unused_memory()\fP can be used to cause this memory to be +freed. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_jit_match.3 b/3rd/pcre2/doc/pcre2_jit_match.3 new file mode 100644 index 00000000..0488a0a1 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_jit_match.3 @@ -0,0 +1,58 @@ +.TH PCRE2_JIT_MATCH 3 "20 January 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression that has been successfully +processed by the JIT compiler against a given subject string, using a matching +algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and +it bypasses some of the sanity checks that \fBpcre2_match()\fP applies. +.P +In UTF mode, the subject string is not checked for UTF validity. Unless +PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an +invalid UTF string results in undefined behaviour. Your program may crash or +loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should only call \fBpcre2_jit_match()\fP in UTF mode if you are sure the +subject is valid. +.P +The arguments for \fBpcre2_jit_match()\fP are exactly the same as for +.\" HREF +\fBpcre2_match()\fP, +.\" +except that the subject string must be specified with a length; +PCRE2_ZERO_TERMINATED is not supported. +.P +The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported +options are ignored. +.P +The return values are the same as for \fBpcre2_match()\fP plus +PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested +that was not compiled. For details of partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the JIT API in the +.\" HREF +\fBpcre2jit\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_jit_stack_assign.3 b/3rd/pcre2/doc/pcre2_jit_stack_assign.3 new file mode 100644 index 00000000..8cb0beaa --- /dev/null +++ b/3rd/pcre2/doc/pcre2_jit_stack_assign.3 @@ -0,0 +1,59 @@ +.TH PCRE2_JIT_STACK_ASSIGN 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, +.B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function provides control over the memory used by JIT as a run-time stack +when \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP is called with a pattern +that has been successfully processed by the JIT compiler. The information that +determines which stack is used is put into a match context that is subsequently +passed to a matching function. The arguments of this function are: +.sp + mcontext a pointer to a match context + callback a callback function + callback_data a JIT stack or a value to be passed to the callback +.P +If \fImcontext\fP is NULL, the function returns immediately, without doing +anything. +.P +If \fIcallback\fP is NULL and \fIcallback_data\fP is NULL, an internal 32KiB +block on the machine stack is used. +.P +If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL, +\fIcallback_data\fP must be a valid JIT stack, the result of calling +\fBpcre2_jit_stack_create()\fP. +.P +If \fIcallback\fP not NULL, it is called with \fIcallback_data\fP as an +argument at the start of matching, in order to set up a JIT stack. If the +result is NULL, the internal 32KiB stack is used; otherwise the return value +must be a valid JIT stack, the result of calling +\fBpcre2_jit_stack_create()\fP. +.P +You may safely use the same JIT stack for multiple patterns, as long as they +are all matched in the same thread. In a multithread application, each thread +must use its own JIT stack. For more details, see the +.\" HREF +\fBpcre2jit\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_jit_stack_create.3 b/3rd/pcre2/doc/pcre2_jit_stack_create.3 new file mode 100644 index 00000000..768368ad --- /dev/null +++ b/3rd/pcre2/doc/pcre2_jit_stack_create.3 @@ -0,0 +1,40 @@ +.TH PCRE2_JIT_STACK_CREATE 3 "23 January 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP, +.B " size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is used to create a stack for use by the code compiled by the JIT +compiler. The first two arguments are a starting size for the stack, and a +maximum size to which it is allowed to grow. The final argument is a general +context, for memory allocation functions, or NULL for standard memory +allocation. The result can be passed to the JIT run-time code by calling +\fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern, +which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP. +A maximum stack size of 512KiB to 1MiB should be more than enough for any +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the +.\" HREF +\fBpcre2jit\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_jit_stack_free.3 b/3rd/pcre2/doc/pcre2_jit_stack_free.3 new file mode 100644 index 00000000..6a102548 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_jit_stack_free.3 @@ -0,0 +1,32 @@ +.TH PCRE2_JIT_STACK_FREE 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B void pcre2_jit_stack_free(pcre2_jit_stack *\fIjit_stack\fP); +. +.SH DESCRIPTION +.rs +.sp +This function is used to free a JIT stack that was created by +\fBpcre2_jit_stack_create()\fP when it is no longer needed. If the argument is +NULL, the function returns immediately without doing anything. For more +details, see the +.\" HREF +\fBpcre2jit\fP +.\" +page. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_maketables.3 b/3rd/pcre2/doc/pcre2_maketables.3 new file mode 100644 index 00000000..5910e8db --- /dev/null +++ b/3rd/pcre2/doc/pcre2_maketables.3 @@ -0,0 +1,36 @@ +.TH PCRE2_MAKETABLES 3 "28 July 2019" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +. +.SH DESCRIPTION +.rs +.sp +This function builds a set of character tables for character code points that +are less than 256. These can be passed to \fBpcre2_compile()\fP in a compile +context in order to override the internal, built-in tables (which were either +defaulted or made by \fBpcre2_maketables()\fP when PCRE2 was compiled). See the +.\" HREF +\fBpcre2_set_character_tables()\fP +.\" +page. You might want to do this if you are using a non-standard locale. +.P +If the argument is NULL, \fBmalloc()\fP is used to get memory for the tables. +Otherwise it must point to a general context, which can supply pointers to a +custom memory manager. The function yields a pointer to the tables. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_maketables_free.3 b/3rd/pcre2/doc/pcre2_maketables_free.3 new file mode 100644 index 00000000..95ee24fa --- /dev/null +++ b/3rd/pcre2/doc/pcre2_maketables_free.3 @@ -0,0 +1,31 @@ +.TH PCRE2_MAKETABLES_FREE 3 "03 September 2019" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function discards a set of character tables that were created by a call +to +.\" HREF +\fBpcre2_maketables()\fP. +.\" +.P +The \fIgcontext\fP parameter should match what was used in that call to +account for any custom allocators that might be in use; if it is NULL +the system \fBfree()\fP is used. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match.3 b/3rd/pcre2/doc/pcre2_match.3 new file mode 100644 index 00000000..afa58ffd --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match.3 @@ -0,0 +1,86 @@ +.TH PCRE2_MATCH 3 "27 January 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It returns +offsets to what it has matched and to captured substrings via the +\fBmatch_data\fP block, which can be processed by functions with names that +start with \fBpcre2_get_ovector_...()\fP or \fBpcre2_substring_...()\fP. The +return from \fBpcre2_match()\fP is one more than the highest numbered capturing +pair that has been set (for example, 1 if there are no captures), zero if the +vector of offsets is too small, or a negative error code for no match and other +errors. The function arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fImatch_data\fP Points to a match data block, for results + \fImcontext\fP Points to a match context, or is NULL +.sp +A match context is needed only if you want to: +.sp + Set up a callout function + Set a matching offset limit + Change the heap memory limit + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management specifically for the match +.sp +The \fIlength\fP and \fIstartoffset\fP values are code units, not characters. +The length may be given as PCRE2_ZERO_TERMINATED for a subject that is +terminated by a binary zero code unit. The options are: +.sp + PCRE2_ANCHORED Match only at the first position + PCRE2_COPY_MATCHED_SUBJECT + On success, make a private subject copy + PCRE2_DISABLE_RECURSELOOP_CHECK + Only useful in rare cases; use with care + PCRE2_ENDANCHORED Pattern can match only at end of subject + PCRE2_NOTBOL Subject string is not the beginning of a line + PCRE2_NOTEOL Subject string is not the end of a line + PCRE2_NOTEMPTY An empty string is not a valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject + is not a valid match + PCRE2_NO_JIT Do not use JIT matching +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check the subject for UTF + validity (only relevant if PCRE2_UTF + was set at compile time) +.\" JOIN + PCRE2_PARTIAL_HARD Return PCRE2_ERROR_PARTIAL for a partial + match even if there is a full match +.\" JOIN + PCRE2_PARTIAL_SOFT Return PCRE2_ERROR_PARTIAL for a partial + match if no full matches are found +.sp +For details of partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +page. There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match_context_copy.3 b/3rd/pcre2/doc/pcre2_match_context_copy.3 new file mode 100644 index 00000000..deaf7007 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match_context_copy.3 @@ -0,0 +1,29 @@ +.TH PCRE2_MATCH_CONTEXT_COPY 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_context *pcre2_match_context_copy( +.B " pcre2_match_context *\fImcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function makes a new copy of a match context, using the memory +allocation function that was used for the original context. The result is NULL +if the memory cannot be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match_context_create.3 b/3rd/pcre2/doc/pcre2_match_context_create.3 new file mode 100644 index 00000000..5d638ba2 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match_context_create.3 @@ -0,0 +1,30 @@ +.TH PCRE2_MATCH_CONTEXT_CREATE 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_context *pcre2_match_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates and initializes a new match context. If its argument is +NULL, \fBmalloc()\fP is used to get the necessary memory; otherwise the memory +allocation function within the general context is used. The result is NULL if +the memory could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match_context_free.3 b/3rd/pcre2/doc/pcre2_match_context_free.3 new file mode 100644 index 00000000..1de4b111 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match_context_free.3 @@ -0,0 +1,29 @@ +.TH PCRE2_MATCH_CONTEXT_FREE 3 "28 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory occupied by a match context, using the memory +freeing function from the general context with which it was created, or +\fBfree()\fP if that was not set. If the argument is NULL, the function returns +immediately without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match_data_create.3 b/3rd/pcre2/doc/pcre2_match_data_create.3 new file mode 100644 index 00000000..0a6862f4 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match_data_create.3 @@ -0,0 +1,37 @@ +.TH PCRE2_MATCH_DATA_CREATE 3 "28 August 2021" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates a new match data block, which is used for holding the +result of a match. The first argument specifies the number of pairs of offsets +that are required. These form the "output vector" (ovector) within the match +data block, and are used to identify the matched string and any captured +substrings when matching with \fBpcre2_match()\fP, or a number of different +matches at the same point when used with \fBpcre2_dfa_match()\fP. There is +always one pair of offsets; if \fBovecsize\fP is zero, it is treated as one. +.P +The second argument points to a general context, for custom memory management, +or is NULL for system memory management. The result of the function is NULL if +the memory for the block could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match_data_create_from_pattern.3 b/3rd/pcre2/doc/pcre2_match_data_create_from_pattern.3 new file mode 100644 index 00000000..7cb2db86 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match_data_create_from_pattern.3 @@ -0,0 +1,40 @@ +.TH PCRE2_MATCH_DATA_CREATE_FROM_PATTERN 3 "19 August 2022" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function creates a new match data block for holding the result of a match. +The first argument points to a compiled pattern. The number of capturing +parentheses within the pattern is used to compute the number of pairs of +offsets that are required in the match data block. These form the "output +vector" (ovector) within the match data block, and are used to identify the +matched string and any captured substrings when matching with +\fBpcre2_match()\fP. If you are using \fBpcre2_dfa_match()\fP, which uses the +output vector in a different way, you should use \fBpcre2_match_data_create()\fP +instead of this function. +.P +The second argument points to a general context, for custom memory management, +or is NULL to use the same memory allocator as was used for the compiled +pattern. The result of the function is NULL if the memory for the block could +not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_match_data_free.3 b/3rd/pcre2/doc/pcre2_match_data_free.3 new file mode 100644 index 00000000..c5ecc258 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_match_data_free.3 @@ -0,0 +1,35 @@ +.TH PCRE2_MATCH_DATA_FREE 3 "16 August 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +If \fImatch_data\fP is NULL, this function does nothing. Otherwise, +\fImatch_data\fP must point to a match data block, which this function frees, +using the memory freeing function from the general context or compiled pattern +with which it was created, or \fBfree()\fP if that was not set. If the match +data block was previously passed to \fBpcre2_match()\fP, it will have an +attached heapframe vector; this is also freed. +.P +If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this +match data block, the copy of the subject that was referenced within the block +is also freed. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_pattern_convert.3 b/3rd/pcre2/doc/pcre2_pattern_convert.3 new file mode 100644 index 00000000..0fe62609 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_pattern_convert.3 @@ -0,0 +1,55 @@ +.TH PCRE2_PATTERN_CONVERT 3 "12 July 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP," +.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It converts a foreign pattern (for example, a glob) into a PCRE2 regular +expression pattern. Its arguments are: +.sp + \fIpattern\fP The foreign pattern + \fIlength\fP The length of the input pattern or PCRE2_ZERO_TERMINATED + \fIoptions\fP Option bits + \fIbuffer\fP Pointer to pointer to output buffer, or NULL + \fIblength\fP Pointer to output length field + \fIcvcontext\fP Pointer to a convert context or NULL +.sp +The length of the converted pattern (excluding the terminating zero) is +returned via \fIblength\fP. If \fIbuffer\fP is NULL, the function just returns +the output length. If \fIbuffer\fP points to a NULL pointer, heap memory is +obtained for the converted pattern, using the allocator in the context if +present (or else \fBmalloc()\fP), and the field pointed to by \fIbuffer\fP is +updated. If \fIbuffer\fP points to a non-NULL field, that must point to a +buffer whose size is in the variable pointed to by \fIblength\fP. This value is +updated. +.P +The option bits are: +.sp + PCRE2_CONVERT_UTF Input is UTF + PCRE2_CONVERT_NO_UTF_CHECK Do not check UTF validity + PCRE2_CONVERT_POSIX_BASIC Convert POSIX basic pattern + PCRE2_CONVERT_POSIX_EXTENDED Convert POSIX extended pattern + PCRE2_CONVERT_GLOB ) Convert + PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR ) various types + PCRE2_CONVERT_GLOB_NO_STARSTAR ) of glob +.sp +The return value from \fBpcre2_pattern_convert()\fP is zero on success or a +non-zero PCRE2 error code. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_pattern_info.3 b/3rd/pcre2/doc/pcre2_pattern_info.3 new file mode 100644 index 00000000..69ce3574 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_pattern_info.3 @@ -0,0 +1,108 @@ +.TH PCRE2_PATTERN_INFO 3 "14 February 2019" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, +.B " void *\fIwhere\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns information about a compiled pattern. Its arguments are: +.sp + \fIcode\fP Pointer to a compiled regular expression pattern + \fIwhat\fP What information is required + \fIwhere\fP Where to put the information +.sp +The recognized values for the \fIwhat\fP argument, and the information they +request are as follows: +.sp + PCRE2_INFO_ALLOPTIONS Final options after compiling + PCRE2_INFO_ARGOPTIONS Options passed to \fBpcre2_compile()\fP + PCRE2_INFO_BACKREFMAX Number of highest backreference + PCRE2_INFO_BSR What \eR matches: + PCRE2_BSR_UNICODE: Unicode line endings + PCRE2_BSR_ANYCRLF: CR, LF, or CRLF only + PCRE2_INFO_CAPTURECOUNT Number of capturing subpatterns +.\" JOIN + PCRE2_INFO_DEPTHLIMIT Backtracking depth limit if set, + otherwise PCRE2_ERROR_UNSET + PCRE2_INFO_EXTRAOPTIONS Extra options that were passed in the + compile context + PCRE2_INFO_FIRSTBITMAP Bitmap of first code units, or NULL + PCRE2_INFO_FIRSTCODETYPE Type of start-of-match information + 0 nothing set + 1 first code unit is set + 2 start of string or after newline + PCRE2_INFO_FIRSTCODEUNIT First code unit when type is 1 + PCRE2_INFO_FRAMESIZE Size of backtracking frame + PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \eC +.\" JOIN + PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches + exist in the pattern +.\" JOIN + PCRE2_INFO_HEAPLIMIT Heap memory limit if set, + otherwise PCRE2_ERROR_UNSET + PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used + PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0 + PCRE2_INFO_LASTCODETYPE Type of must-be-present information + 0 nothing set + 1 code unit is set + PCRE2_INFO_LASTCODEUNIT Last code unit when type is 1 +.\" JOIN + PCRE2_INFO_MATCHEMPTY 1 if the pattern can match an + empty string, 0 otherwise +.\" JOIN + PCRE2_INFO_MATCHLIMIT Match limit if set, + otherwise PCRE2_ERROR_UNSET +.\" JOIN + PCRE2_INFO_MAXLOOKBEHIND Length (in characters) of the longest + lookbehind assertion + PCRE2_INFO_MINLENGTH Lower bound length of matching strings + PCRE2_INFO_NAMECOUNT Number of named subpatterns + PCRE2_INFO_NAMEENTRYSIZE Size of name table entries + PCRE2_INFO_NAMETABLE Pointer to name table + PCRE2_CONFIG_NEWLINE Code for the newline sequence: + PCRE2_NEWLINE_CR + PCRE2_NEWLINE_LF + PCRE2_NEWLINE_CRLF + PCRE2_NEWLINE_ANY + PCRE2_NEWLINE_ANYCRLF + PCRE2_NEWLINE_NUL + PCRE2_INFO_RECURSIONLIMIT Obsolete synonym for PCRE2_INFO_DEPTHLIMIT + PCRE2_INFO_SIZE Size of compiled pattern +.sp +If \fIwhere\fP is NULL, the function returns the amount of memory needed for +the requested information, in bytes. Otherwise, the \fIwhere\fP argument must +point to an unsigned 32-bit integer (uint32_t variable), except for the +following \fIwhat\fP values, when it must point to a variable of the type +shown: +.sp + PCRE2_INFO_FIRSTBITMAP const uint8_t * + PCRE2_INFO_JITSIZE size_t + PCRE2_INFO_NAMETABLE PCRE2_SPTR + PCRE2_INFO_SIZE size_t +.sp +The yield of the function is zero on success or: +.sp + PCRE2_ERROR_NULL the argument \fIcode\fP is NULL + PCRE2_ERROR_BADMAGIC the "magic number" was not found + PCRE2_ERROR_BADOPTION the value of \fIwhat\fP is invalid + PCRE2_ERROR_BADMODE the pattern was compiled in the wrong mode + PCRE2_ERROR_UNSET the requested information is not set +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_serialize_decode.3 b/3rd/pcre2/doc/pcre2_serialize_decode.3 new file mode 100644 index 00000000..6a3f30d6 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_serialize_decode.3 @@ -0,0 +1,53 @@ +.TH PCRE2_SERIALIZE_DECODE 3 "22 April 2022" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function decodes a serialized set of compiled patterns back into a list of +individual patterns. This is possible only on a host that is running the same +version of PCRE2, with the same code unit width, and the host must also have +the same endianness, pointer width and PCRE2_SIZE type. The arguments for +\fBpcre2_serialize_decode()\fP are: +.sp + \fIcodes\fP pointer to a vector in which to build the list + \fInumber_of_codes\fP number of slots in the vector + \fIbytes\fP the serialized byte stream + \fIgcontext\fP pointer to a general context or NULL +.sp +The \fIbytes\fP argument must point to a block of data that was originally +created by \fBpcre2_serialize_encode()\fP, though it may have been saved on +disc or elsewhere in the meantime. If there are more codes in the serialized +data than slots in the list, only those compiled patterns that will fit are +decoded. The yield of the function is the number of decoded patterns, or one of +the following negative error codes: +.sp + PCRE2_ERROR_BADDATA \fInumber_of_codes\fP is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in \fIbytes\fP + PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version + PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_NULL \fIcodes\fP or \fIbytes\fP is NULL +.sp +PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_serialize_encode.3 b/3rd/pcre2/doc/pcre2_serialize_encode.3 new file mode 100644 index 00000000..f682a54a --- /dev/null +++ b/3rd/pcre2/doc/pcre2_serialize_encode.3 @@ -0,0 +1,54 @@ +.TH PCRE2_SERIALIZE_ENCODE 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," +.B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function encodes a list of compiled patterns into a byte stream that can +be saved on disc or elsewhere. Note that this is not an abstract format like +Java or .NET. Conversion of the byte stream back into usable compiled patterns +can only happen on a host that is running the same version of PCRE2, with the +same code unit width, and the host must also have the same endianness, pointer +width and PCRE2_SIZE type. The arguments for \fBpcre2_serialize_encode()\fP +are: +.sp + \fIcodes\fP pointer to a vector containing the list + \fInumber_of_codes\fP number of slots in the vector + \fIserialized_bytes\fP set to point to the serialized byte stream + \fIserialized_size\fP set to the number of bytes in the byte stream + \fIgcontext\fP pointer to a general context or NULL +.sp +The context argument is used to obtain memory for the byte stream. When the +serialized data is no longer needed, it must be freed by calling +\fBpcre2_serialize_free()\fP. The yield of the function is the number of +serialized patterns, or one of the following negative error codes: +.sp + PCRE2_ERROR_BADDATA \fInumber_of_codes\fP is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL an argument other than \fIgcontext\fP is NULL +.sp +PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_serialize_free.3 b/3rd/pcre2/doc/pcre2_serialize_free.3 new file mode 100644 index 00000000..73bfa4e9 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_serialize_free.3 @@ -0,0 +1,29 @@ +.TH PCRE2_SERIALIZE_FREE 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B void pcre2_serialize_free(uint8_t *\fIbytes\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This function frees the memory that was obtained by +\fBpcre2_serialize_encode()\fP to hold a serialized byte stream. The argument +must point to such a byte stream or be NULL, in which case the function returns +without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_serialize_get_number_of_codes.3 b/3rd/pcre2/doc/pcre2_serialize_get_number_of_codes.3 new file mode 100644 index 00000000..9c4ebf72 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_serialize_get_number_of_codes.3 @@ -0,0 +1,37 @@ +.TH PCRE2_SERIALIZE_GET_NUMBER_OF_CODES 3 "13 August 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int32_t pcre2_serialize_get_number_of_codes(const uint8_t *\fIbytes\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +The \fIbytes\fP argument must point to a serialized byte stream that was +originally created by \fBpcre2_serialize_encode()\fP (though it may have been +saved on disc or elsewhere in the meantime). The function returns the number of +serialized patterns in the byte stream, or one of the following negative error +codes: +.sp + PCRE2_ERROR_BADMAGIC mismatch of id bytes in \fIbytes\fP + PCRE2_ERROR_BADMODE mismatch of variable unit size or PCRE version + PCRE2_ERROR_NULL the argument is NULL +.sp +PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the serialization functions in the +.\" HREF +\fBpcre2serialize\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_bsr.3 b/3rd/pcre2/doc/pcre2_set_bsr.3 new file mode 100644 index 00000000..fb4018a7 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_bsr.3 @@ -0,0 +1,30 @@ +.TH PCRE2_SET_BSR 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the convention for processing \eR within a compile context. +The second argument must be one of PCRE2_BSR_ANYCRLF or PCRE2_BSR_UNICODE. The +result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_callout.3 b/3rd/pcre2/doc/pcre2_set_callout.3 new file mode 100644 index 00000000..d973cfc2 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_callout.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_CALLOUT 3 "25 March 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_callout_block *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the callout fields in a match context (the first argument). +The second argument specifies a callout function, and the third argument is an +opaque data item that is passed to it. The result of this function is always +zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_character_tables.3 b/3rd/pcre2/doc/pcre2_set_character_tables.3 new file mode 100644 index 00000000..690191b0 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_character_tables.3 @@ -0,0 +1,35 @@ +.TH PCRE2_SET_CHARACTER_TABLES 3 "15 April 2020" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets a pointer to custom character tables within a compile +context. The second argument must point to a set of PCRE2 character tables or +be NULL to request the default tables. The result is always zero. Character +tables can be created by calling \fBpcre2_maketables()\fP or by running the +\fBpcre2_dftables\fP maintenance command in binary mode (see the +.\" HREF +\fBpcre2build\fP +.\" +documentation). +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_compile_extra_options.3 b/3rd/pcre2/doc/pcre2_set_compile_extra_options.3 new file mode 100644 index 00000000..703b541b --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_compile_extra_options.3 @@ -0,0 +1,59 @@ +.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "14 October 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIextra_options\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets additional option bits for \fBpcre2_compile()\fP that are +housed in a compile context. It completely replaces all the bits. The extra +options are: +.sp + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \eK in lookarounds +.\" JOIN + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{d800} to \ex{dfff} + in UTF-8 and UTF-32 modes +.\" JOIN + PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and + \ex handling + PCRE2_EXTRA_ASCII_BSD \ed remains ASCII in UCP mode + PCRE2_EXTRA_ASCII_BSS \es remains ASCII in UCP mode + PCRE2_EXTRA_ASCII_BSW \ew remains ASCII in UCP mode +.\" JOIN + PCRE2_EXTRA_ASCII_DIGIT [:digit:] and [:xdigit:] POSIX classes + remain ASCII in UCP mode +.\" JOIN + PCRE2_EXTRA_ASCII_POSIX POSIX classes remain ASCII in + UCP mode +.\" JOIN + PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as + a literal following character +.\" JOIN + PCRE2_EXTRA_CASELESS_RESTRICT Disable mixed ASCII/non-ASCII + case folding + PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \er as \en + PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines + PCRE2_EXTRA_MATCH_WORD Pattern matches "words" + PCRE2_EXTRA_NEVER_CALLOUT Disallow callouts in pattern + PCRE2_EXTRA_NO_BS0 Disallow \e0 (but not \e00 or \e000) + PCRE2_EXTRA_PYTHON_OCTAL Use Python rules for octal + PCRE2_EXTRA_TURKISH_CASING Use Turkish I case folding +.sp +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_compile_recursion_guard.3 b/3rd/pcre2/doc/pcre2_set_compile_recursion_guard.3 new file mode 100644 index 00000000..ac9baca1 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_compile_recursion_guard.3 @@ -0,0 +1,34 @@ +.TH PCRE2_SET_COMPILE_RECURSION_GUARD 3 "26 November 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, +.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function defines, within a compile context, a function that is called +whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a +pattern. The first argument to the function gives the current depth of +parenthesis nesting, and the second is user data that is supplied when the +function is set up. The callout function should return zero if all is well, or +non-zero to force an error. This feature is provided so that applications can +check the available system stack space, in order to avoid running out. The +result of \fBpcre2_set_compile_recursion_guard()\fP is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_depth_limit.3 b/3rd/pcre2/doc/pcre2_set_depth_limit.3 new file mode 100644 index 00000000..74b4d131 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_depth_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_DEPTH_LIMIT 3 "25 March 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_depth_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the backtracking depth limit field in a match context. The +result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_glob_escape.3 b/3rd/pcre2/doc/pcre2_set_glob_escape.3 new file mode 100644 index 00000000..a8cfac3f --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_glob_escape.3 @@ -0,0 +1,29 @@ +.TH PCRE2_SET_GLOB_ESCAPE 3 "12 July 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIescape_char\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It sets the escape character that is used when converting globs. The second +argument must either be zero (meaning there is no escape character) or a +punctuation character whose code point is less than 256. The default is grave +accent if running under Windows, otherwise backslash. The result of the +function is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_set_glob_separator.3 b/3rd/pcre2/doc/pcre2_set_glob_separator.3 new file mode 100644 index 00000000..0016c5f0 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_glob_separator.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_GLOB_SEPARATOR 3 "17 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIseparator_char\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is part of an experimental set of pattern conversion functions. +It sets the component separator character that is used when converting globs. +The second argument must be one of the characters forward slash, backslash, or +dot. The default is backslash when running under Windows, otherwise forward +slash. The result of the function is zero for success or PCRE2_ERROR_BADDATA if +the second argument is invalid. +.P +The pattern conversion functions are described in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. diff --git a/3rd/pcre2/doc/pcre2_set_heap_limit.3 b/3rd/pcre2/doc/pcre2_set_heap_limit.3 new file mode 100644 index 00000000..61c9349c --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_heap_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_HEAP_LIMIT 3 "17 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the backtracking heap limit field in a match context. The +result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_match_limit.3 b/3rd/pcre2/doc/pcre2_set_match_limit.3 new file mode 100644 index 00000000..615d8174 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_match_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_MATCH_LIMIT 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the match limit field in a match context. The result is +always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_max_pattern_compiled_length.3 b/3rd/pcre2/doc/pcre2_set_max_pattern_compiled_length.3 new file mode 100644 index 00000000..edb9460a --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_max_pattern_compiled_length.3 @@ -0,0 +1,32 @@ +.TH PCRE2_SET_MAX_PATTERN_COMPILED_LENGTH 3 "09 June 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_pattern_compiled_length( +.B " pcre2_compile_context *\fIccontext\fP, PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum size (in bytes) for the +memory needed to hold the compiled version of a pattern that is using this +context. The result is always zero. If a pattern that is passed to +\fBpcre2_compile()\fP referencing this context needs more memory, an error is +generated. The default is the largest number that a PCRE2_SIZE variable can +hold, which is effectively unlimited. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_max_pattern_length.3 b/3rd/pcre2/doc/pcre2_set_max_pattern_length.3 new file mode 100644 index 00000000..fa8357ec --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_max_pattern_length.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 October 2016" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum text length (in code +units) of the pattern that can be compiled. The result is always zero. If a +longer pattern is passed to \fBpcre2_compile()\fP there is an immediate error +return. The default is effectively unlimited, being the largest value a +PCRE2_SIZE variable can hold. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_max_varlookbehind.3 b/3rd/pcre2/doc/pcre2_set_max_varlookbehind.3 new file mode 100644 index 00000000..b54426ff --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_max_varlookbehind.3 @@ -0,0 +1,30 @@ +.TH PCRE2_SET_NEWLINE 3 "11 August 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_max_varlookbehind(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. The result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_newline.3 b/3rd/pcre2/doc/pcre2_set_newline.3 new file mode 100644 index 00000000..1e74bb46 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_newline.3 @@ -0,0 +1,39 @@ +.TH PCRE2_SET_NEWLINE 3 "19 July 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the newline convention within a compile context. This +specifies which character(s) are recognized as newlines when compiling and +matching patterns. The second argument must be one of: +.sp + PCRE2_NEWLINE_CR Carriage return only + PCRE2_NEWLINE_LF Linefeed only + PCRE2_NEWLINE_CRLF CR followed by LF only + PCRE2_NEWLINE_ANYCRLF Any of the above + PCRE2_NEWLINE_ANY Any Unicode newline sequence + PCRE2_NEWLINE_NUL The NUL character (binary zero) +.sp +The result is zero for success or PCRE2_ERROR_BADDATA if the second argument is +invalid. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_offset_limit.3 b/3rd/pcre2/doc/pcre2_set_offset_limit.3 new file mode 100644 index 00000000..72de1f0e --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_offset_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_OFFSET_LIMIT 3 "22 September 2015" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the offset limit field in a match context. The result is +always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_optimize.3 b/3rd/pcre2/doc/pcre2_set_optimize.3 new file mode 100644 index 00000000..9a17baa5 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_optimize.3 @@ -0,0 +1,42 @@ +.TH PCRE2_SET_OPTIMIZE 3 "22 September 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_optimize(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIdirective\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function controls which performance optimizations will be applied +by \fBpcre2_compile()\fP. It can be called multiple times with the same compile +context; the effects are cumulative, with the effects of later calls taking +precedence over earlier ones. +.P +The result is zero for success, PCRE2_ERROR_NULL if \fIccontext\fP is NULL, +or PCRE2_ERROR_BADOPTION if \fIdirective\fP is unknown. The latter could be +useful to detect if a certain optimization is available. +.P +The list of possible values for the \fIdirective\fP parameter are: +.sp + PCRE2_OPTIMIZATION_FULL Enable all optimizations (default) + PCRE2_OPTIMIZATION_NONE Disable all optimizations + PCRE2_AUTO_POSSESS Enable auto-possessification + PCRE2_AUTO_POSSESS_OFF Disable auto-possessification + PCRE2_DOTSTAR_ANCHOR Enable implicit dotstar anchoring + PCRE2_DOTSTAR_ANCHOR_OFF Disable implicit dotstar anchoring + PCRE2_START_OPTIMIZE Enable start-up optimizations at match time + PCRE2_START_OPTIMIZE_OFF Disable start-up optimizations at match time +.sp +There is a complete description of the PCRE2 native API, including detailed +descriptions \fIdirective\fP parameter values in the +.\" HREF +\fBpcre2api\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_parens_nest_limit.3 b/3rd/pcre2/doc/pcre2_set_parens_nest_limit.3 new file mode 100644 index 00000000..b74b4166 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_parens_nest_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_PARENS_NEST_LIMIT 3 "25 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets, in a compile context, the maximum depth of nested +parentheses in a pattern. The result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_recursion_limit.3 b/3rd/pcre2/doc/pcre2_set_recursion_limit.3 new file mode 100644 index 00000000..e95d1f78 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_recursion_limit.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SET_RECURSION_LIMIT 3 "19 July 2017" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_recursion_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function is obsolete and should not be used in new code. Use +\fBpcre2_set_depth_limit()\fP instead. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_recursion_memory_management.3 b/3rd/pcre2/doc/pcre2_set_recursion_memory_management.3 new file mode 100644 index 00000000..0ae39dc8 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_recursion_memory_management.3 @@ -0,0 +1,30 @@ +.TH PCRE2_SET_RECURSION_MEMORY_MANAGEMENT 3 "23 January 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_recursion_memory_management( +.B " pcre2_match_context *\fImcontext\fP," +.B " void *(*\fIprivate_malloc\fP)(size_t, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +From release 10.30 onwards, this function is obsolete and does nothing. The +result is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_substitute_callout.3 b/3rd/pcre2/doc/pcre2_set_substitute_callout.3 new file mode 100644 index 00000000..628a2f64 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_substitute_callout.3 @@ -0,0 +1,31 @@ +.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "04 October 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the substitute callout fields in a match context (the first +argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_set_substitute_case_callout.3 b/3rd/pcre2/doc/pcre2_set_substitute_case_callout.3 new file mode 100644 index 00000000..660fa9c1 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_set_substitute_case_callout.3 @@ -0,0 +1,33 @@ +.TH PCRE2_SET_SUBSTITUTE_CASE_CALLOUT 3 "26 December 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function sets the substitute case callout fields in a match context (the +first argument). The second argument specifies a callout function, and the third +argument is an opaque data item that is passed to it. The result of this +function is always zero. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substitute.3 b/3rd/pcre2/doc/pcre2_substitute.3 new file mode 100644 index 00000000..b4cd49a9 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substitute.3 @@ -0,0 +1,110 @@ +.TH PCRE2_SUBSTITUTE 3 "27 November 2021" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP," +.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP," +.B " PCRE2_SIZE *\fIoutlengthptr\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function matches a compiled regular expression against a given subject +string, using a matching algorithm that is similar to Perl's. It then makes a +copy of the subject, substituting a replacement string for what was matched. +Its arguments are: +.sp + \fIcode\fP Points to the compiled pattern + \fIsubject\fP Points to the subject string + \fIlength\fP Length of the subject string + \fIstartoffset\fP Offset in the subject at which to start matching + \fIoptions\fP Option bits + \fImatch_data\fP Points to a match data block, or is NULL + \fImcontext\fP Points to a match context, or is NULL + \fIreplacement\fP Points to the replacement string + \fIrlength\fP Length of the replacement string + \fIoutputbuffer\fP Points to the output buffer + \fIoutlengthptr\fP Points to the length of the output buffer +.sp +A match data block is needed only if you want to inspect the data from the +final match that is returned in that block or if PCRE2_SUBSTITUTE_MATCHED is +set. A match context is needed only if you want to: +.sp + Set up a callout function + Set a matching offset limit + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management in the match context +.sp +The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code units, +not characters, as is the contents of the variable pointed at by +\fIoutlengthptr\fP. This variable must contain the length of the output buffer +when the function is called. If the function is successful, the value is +changed to the length of the new string, excluding the trailing zero that is +automatically added. +.P +The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for +zero-terminated strings. The options are: +.sp + PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Match only at end of subject +.\" JOIN + PCRE2_NOTBOL Subject is not the beginning of a + line + PCRE2_NOTEOL Subject is not the end of a line +.\" JOIN + PCRE2_NOTEMPTY An empty string is not a + valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of + the subject is not a valid match + PCRE2_NO_JIT Do not use JIT matching +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check for UTF validity in + the subject or replacement +.\" JOIN + (only relevant if PCRE2_UTF was + set at compile time) + PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing +.\" JOIN + PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the + subject + PCRE2_SUBSTITUTE_LITERAL The replacement string is literal +.\" JOIN + PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for + first match + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length + PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s) + PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset + PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string +.sp +If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, +PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. +.P +If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its +contents must be the result of a call to \fBpcre2_match()\fP using the same +pattern and subject. +.P +The function returns the number of substitutions, which may be zero if there +are no matches. The result may be greater than one only when +PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code +is returned. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_copy_byname.3 b/3rd/pcre2/doc/pcre2_substring_copy_byname.3 new file mode 100644 index 00000000..de8cc10d --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_copy_byname.3 @@ -0,0 +1,46 @@ +.TH PCRE2_SUBSTRING_COPY_BYNAME 3 "19 December 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_copy_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR *\fIbuffer\fP, PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring, identified +by name, into a given buffer. The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fIname\fP Name of the required substring + \fIbuffer\fP Buffer to receive the string + \fIbufflen\fP Length of buffer (code units) +.sp +The \fIbufflen\fP variable is updated to contain the length of the extracted +string, excluding the trailing zero. The yield of the function is zero for +success or one of the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that name + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY the buffer is not big enough +.sp +If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_copy_bynumber.3 b/3rd/pcre2/doc/pcre2_substring_copy_bynumber.3 new file mode 100644 index 00000000..ec7428b4 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_copy_bynumber.3 @@ -0,0 +1,44 @@ +.TH PCRE2_SUBSTRING_COPY_BYNUMBER 3 "13 December 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_copy_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR *\fIbuffer\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring into a given +buffer. The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fInumber\fP Number of the required substring + \fIbuffer\fP Buffer to receive the string + \fIbufflen\fP Length of buffer +.sp +The \fIbufflen\fP variable is updated with the length of the extracted string, +excluding the terminating zero. The yield of the function is zero for success +or one of the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that number + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY the buffer is too small +.sp +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_free.3 b/3rd/pcre2/doc/pcre2_substring_free.3 new file mode 100644 index 00000000..12b65044 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_free.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SUBSTRING_FREE 3 "28 June 2018" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for freeing the memory obtained by a previous +call to \fBpcre2_substring_get_byname()\fP or +\fBpcre2_substring_get_bynumber()\fP. Its only argument is a pointer to the +string. If the argument is NULL, the function does nothing. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_get_byname.3 b/3rd/pcre2/doc/pcre2_substring_get_byname.3 new file mode 100644 index 00000000..9b14681b --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_get_byname.3 @@ -0,0 +1,48 @@ +.TH PCRE2_SUBSTRING_GET_BYNAME 3 "19 December 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_get_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring by name into +newly acquired memory. The arguments are: +.sp + \fImatch_data\fP The match data for the match + \fIname\fP Name of the required substring + \fIbufferptr\fP Where to put the string pointer + \fIbufflen\fP Where to put the string length +.sp +The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function \fBpcre2_substring_free()\fP can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that name + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY memory could not be obtained +.sp +If there is more than one group with the given name, the first one that is set +is returned. In this situation PCRE2_ERROR_UNSET means that no group with the +given name was set. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_get_bynumber.3 b/3rd/pcre2/doc/pcre2_substring_get_bynumber.3 new file mode 100644 index 00000000..b388a099 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_get_bynumber.3 @@ -0,0 +1,45 @@ +.TH PCRE2_SUBSTRING_GET_BYNUMBER 3 "13 December 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_get_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting a captured substring by number +into newly acquired memory. The arguments are: +.sp + \fImatch_data\fP The match data for the match + \fInumber\fP Number of the required substring + \fIbufferptr\fP Where to put the string pointer + \fIbufflen\fP Where to put the string length +.sp +The memory in which the substring is placed is obtained by calling the same +memory allocation function that was used for the match data block. The +convenience function \fBpcre2_substring_free()\fP can be used to free it when +it is no longer needed. The yield of the function is zero for success or one of +the following error numbers: +.sp + PCRE2_ERROR_NOSUBSTRING there are no groups of that number + PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group + PCRE2_ERROR_UNSET the group did not participate in the match + PCRE2_ERROR_NOMEMORY memory could not be obtained +.sp +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_length_byname.3 b/3rd/pcre2/doc/pcre2_substring_length_byname.3 new file mode 100644 index 00000000..ce075f02 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_length_byname.3 @@ -0,0 +1,34 @@ +.TH PCRE2_SUBSTRING_LENGTH_BYNAME 3 "21 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_length_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SIZE *\fIlength\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the length of a matched substring, identified by name. +The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fIname\fP The substring name + \fIlength\fP Where to return the length +.sp +The yield is zero on success, or an error code if the substring is not found. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_length_bynumber.3 b/3rd/pcre2/doc/pcre2_substring_length_bynumber.3 new file mode 100644 index 00000000..eb10fce3 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_length_bynumber.3 @@ -0,0 +1,36 @@ +.TH PCRE2_SUBSTRING_LENGTH_BYNUMBER 3 "22 December 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_length_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_SIZE *\fIlength\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This function returns the length of a matched substring, identified by number. +The arguments are: +.sp + \fImatch_data\fP The match data block for the match + \fInumber\fP The substring number + \fIlength\fP Where to return the length, or NULL +.sp +The third argument may be NULL if all you want to know is whether or not a +substring is set. The yield is zero on success, or a negative error code +otherwise. After a partial match, only substring 0 is available. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_list_free.3 b/3rd/pcre2/doc/pcre2_substring_list_free.3 new file mode 100644 index 00000000..6f0ec8e1 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_list_free.3 @@ -0,0 +1,28 @@ +.TH PCRE2_SUBSTRING_LIST_FREE 3 "02 December 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP); +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for freeing the store obtained by a previous +call to \fBpcre2substring_list_get()\fP. Its only argument is a pointer to +the list of string pointers. If the argument is NULL, the function returns +immediately, without doing anything. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_list_get.3 b/3rd/pcre2/doc/pcre2_substring_list_get.3 new file mode 100644 index 00000000..97f25258 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_list_get.3 @@ -0,0 +1,44 @@ +.TH PCRE2_SUBSTRING_LIST_GET 3 "21 October 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This is a convenience function for extracting all the captured substrings after +a pattern match. It builds a list of pointers to the strings, and (optionally) +a second list that contains their lengths (in code units), excluding a +terminating zero that is added to each of them. All this is done in a single +block of memory that is obtained using the same memory allocation function that +was used to get the match data block. The convenience function +\fBpcre2_substring_list_free()\fP can be used to free it when it is no longer +needed. The arguments are: +.sp + \fImatch_data\fP The match data block + \fIlistptr\fP Where to put a pointer to the list + \fIlengthsptr\fP Where to put a pointer to the lengths, or NULL +.sp +A pointer to a list of pointers is put in the variable whose address is in +\fIlistptr\fP. The list is terminated by a NULL pointer. If \fIlengthsptr\fP is +not NULL, a matching list of lengths is created, and its address is placed in +\fIlengthsptr\fP. The yield of the function is zero on success or +PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_nametable_scan.3 b/3rd/pcre2/doc/pcre2_substring_nametable_scan.3 new file mode 100644 index 00000000..0d9b611c --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_nametable_scan.3 @@ -0,0 +1,41 @@ +.TH PCRE2_SUBSTRING_NAMETABLE_SCAN 3 "06 February 2019" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_nametable_scan(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This convenience function finds, for a compiled pattern, the first and last +entries for a given name in the table that translates capture group names into +numbers. +.sp + \fIcode\fP Compiled regular expression + \fIname\fP Name whose entries required + \fIfirst\fP Where to return a pointer to the first entry + \fIlast\fP Where to return a pointer to the last entry +.sp +When the name is found in the table, if \fIfirst\fP is NULL, the function +returns a group number, but if there is more than one matching entry, it is not +defined which one. Otherwise, when both pointers have been set, the yield of +the function is the length of each entry in code units. If the name is not +found, PCRE2_ERROR_NOSUBSTRING is returned. +.P +There is a complete description of the PCRE2 native API, including the format of +the table entries, in the +.\" HREF +\fBpcre2api\fP +.\" +page, and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2_substring_number_from_name.3 b/3rd/pcre2/doc/pcre2_substring_number_from_name.3 new file mode 100644 index 00000000..8c26f113 --- /dev/null +++ b/3rd/pcre2/doc/pcre2_substring_number_from_name.3 @@ -0,0 +1,38 @@ +.TH PCRE2_SUBSTRING_NUMBER_FROM_NAME 3 "03 November 2014" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +This convenience function finds the number of a named substring capturing +parenthesis in a compiled pattern, provided that it is a unique name. The +function arguments are: +.sp + \fIcode\fP Compiled regular expression + \fIname\fP Name whose number is required +.sp +The yield of the function is the number of the parenthesis if the name is +found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are +allowed (PCRE2_DUPNAMES is set), if the name is not unique, +PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers +with the same name by calling \fBpcre2_substring_nametable_scan()\fP. +.P +There is a complete description of the PCRE2 native API in the +.\" HREF +\fBpcre2api\fP +.\" +page and a description of the POSIX API in the +.\" HREF +\fBpcre2posix\fP +.\" +page. diff --git a/3rd/pcre2/doc/pcre2api.3 b/3rd/pcre2/doc/pcre2api.3 new file mode 100644 index 00000000..c3a79689 --- /dev/null +++ b/3rd/pcre2/doc/pcre2api.3 @@ -0,0 +1,4482 @@ +.TH PCRE2API 3 "26 December 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.sp +.B #include +.sp +PCRE2 is a new API for PCRE, starting at release 10.0. This document contains a +description of all its native functions. See the +.\" HREF +\fBpcre2\fP +.\" +document for an overview of all the PCRE2 documentation. +. +. +.SH "PCRE2 NATIVE API BASIC FUNCTIONS" +.rs +.sp +.nf +.B pcre2_code *pcre2_compile(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.sp +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B int pcre2_dfa_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP," +.B " int *\fIworkspace\fP, PCRE2_SIZE \fIwscount\fP);" +.sp +.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); +.fi +. +. +.SH "PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS" +.rs +.sp +.nf +.B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_match_data_heapframes_size( +.B " pcre2_match_data *\fImatch_data\fP);" +.sp +.B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); +.fi +. +. +.SH "PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS" +.rs +.sp +.nf +.B pcre2_general_context *pcre2_general_context_create( +.B " void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.sp +.B pcre2_general_context *pcre2_general_context_copy( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP); +.fi +. +. +.SH "PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS" +.rs +.sp +.nf +.B pcre2_compile_context *pcre2_compile_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_compile_context *pcre2_compile_context_copy( +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); +.sp +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, +.B " const uint8_t *\fItables\fP);" +.sp +.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIextra_options\fP);" +.sp +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.sp +.B int pcre2_set_max_pattern_compiled_length( +.B " pcre2_compile_context *\fIccontext\fP, PCRE2_SIZE \fIvalue\fP);" +.sp +.B int pcre2_set_max_varlookbehind(pcre2_compile_contest *\fIccontext\fP, +.B " uint32_t \fIvalue\fP); +.sp +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, +.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.sp +.B int pcre2_set_optimize(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIdirective\fP);" +.fi +. +. +.SH "PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS" +.rs +.sp +.nf +.B pcre2_match_context *pcre2_match_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_context *pcre2_match_context_copy( +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); +.sp +.B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.sp +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.sp +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.sp +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.sp +.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_depth_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +. +. +.SH "PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS" +.rs +.sp +.nf +.B int pcre2_substring_copy_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR *\fIbuffer\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_copy_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR *\fIbuffer\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +.sp +.B int pcre2_substring_get_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_get_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR **\fIbufferptr\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_length_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_length_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_nametable_scan(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);" +.sp +.B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP);" +.sp +.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP); +.sp +.B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP); +.fi +. +. +.SH "PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION" +.rs +.sp +.nf +.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementz\fP," +.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP," +.B " PCRE2_SIZE *\fIoutlengthptr\fP);" +.fi +. +. +.SH "PCRE2 NATIVE API JIT FUNCTIONS" +.rs +.sp +.nf +.B int pcre2_jit_compile(pcre2_code *\fIcode\fP, uint32_t \fIoptions\fP); +.sp +.B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.sp +.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP, +.B " size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, +.B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" +.sp +.B void pcre2_jit_stack_free(pcre2_jit_stack *\fIjit_stack\fP); +.fi +. +. +.SH "PCRE2 NATIVE API SERIALIZATION FUNCTIONS" +.rs +.sp +.nf +.B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," +.B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_serialize_free(uint8_t *\fIbytes\fP); +.sp +.B int32_t pcre2_serialize_get_number_of_codes(const uint8_t *\fIbytes\fP); +.fi +. +. +.SH "PCRE2 NATIVE API AUXILIARY FUNCTIONS" +.rs +.sp +.nf +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.sp +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.sp +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +.sp +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.sp +.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, +.B " void *\fIwhere\fP);" +.sp +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.sp +.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP); +.fi +. +. +.SH "PCRE2 NATIVE API OBSOLETE FUNCTIONS" +.rs +.sp +.nf +.B int pcre2_set_recursion_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.sp +.B int pcre2_set_recursion_memory_management( +.B " pcre2_match_context *\fImcontext\fP," +.B " void *(*\fIprivate_malloc\fP)(size_t, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +.sp +These functions became obsolete at release 10.30 and are retained only for +backward compatibility. They should not be used in new code. The first is +replaced by \fBpcre2_set_depth_limit()\fP; the second is no longer needed and +has no effect (it always returns zero). +. +. +.SH "PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS" +.rs +.sp +.nf +.B pcre2_convert_context *pcre2_convert_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_convert_context *pcre2_convert_context_copy( +.B " pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP); +.sp +.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIescape_char\fP);" +.sp +.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIseparator_char\fP);" +.sp +.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP," +.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP); +.fi +.sp +These functions provide a way of converting non-PCRE2 patterns into +patterns that can be processed by \fBpcre2_compile()\fP. This facility is +experimental and may be changed in future releases. At present, "globs" and +POSIX basic and extended patterns can be converted. Details are given in the +.\" HREF +\fBpcre2convert\fP +.\" +documentation. +. +. +.SH "PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES" +.rs +.sp +There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code +units, respectively. However, there is just one header file, \fBpcre2.h\fP. +This contains the function prototypes and other definitions for all three +libraries. One, two, or all three can be installed simultaneously. On Unix-like +systems the libraries are called \fBlibpcre2-8\fP, \fBlibpcre2-16\fP, and +\fBlibpcre2-32\fP, and they can also co-exist with the original PCRE libraries. +Every PCRE2 function comes in three different forms, one for each library, for +example: +.sp + \fBpcre2_compile_8()\fP + \fBpcre2_compile_16()\fP + \fBpcre2_compile_32()\fP +.sp +There are also three different sets of data types: +.sp + \fBPCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32\fP + \fBPCRE2_SPTR8, PCRE2_SPTR16, PCRE2_SPTR32\fP +.sp +The UCHAR types define unsigned code units of the appropriate widths. +For example, PCRE2_UCHAR16 is usually defined as `uint16_t'. +The SPTR types are pointers to constants of the equivalent UCHAR types, +that is, they are pointers to vectors of unsigned code units. +.P +Character strings are passed to a PCRE2 library as sequences of unsigned +integers in code units of the appropriate width. The length of a string may +be given as a number of code units, or the string may be specified as +zero-terminated. +.P +Many applications use only one code unit width. For their convenience, macros +are defined whose names are the generic forms such as \fBpcre2_compile()\fP and +PCRE2_SPTR. These macros use the value of the macro PCRE2_CODE_UNIT_WIDTH to +generate the appropriate width-specific function and macro names. +PCRE2_CODE_UNIT_WIDTH is not defined by default. An application must define it +to be 8, 16, or 32 before including \fBpcre2.h\fP in order to make use of the +generic names. +.P +Applications that use more than one code unit width can be linked with more +than one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to be 0 before +including \fBpcre2.h\fP, and then use the real function names. Any code that is +to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is +unknown should also use the real function names. (Unfortunately, it is not +possible in C code to save and restore the value of a macro.) +.P +If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a +compiler error occurs. +.P +When using multiple libraries in an application, you must take care when +processing any particular pattern to use only functions from a single library. +For example, if you want to run a match using a pattern that was compiled with +\fBpcre2_compile_16()\fP, you must do so with \fBpcre2_match_16()\fP, not +\fBpcre2_match_8()\fP or \fBpcre2_match_32()\fP. +.P +In the function summaries above, and in the rest of this document and other +PCRE2 documents, functions and data types are described using their generic +names, without the _8, _16, or _32 suffix. +. +. +.SH "PCRE2 API OVERVIEW" +.rs +.sp +PCRE2 has its own native API, which is described in this document. There are +also some wrapper functions for the 8-bit library that correspond to the +POSIX regular expression API, but they do not give access to all the +functionality of PCRE2 and they are not thread-safe. They are described in the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. Both these APIs define a set of C function calls. +.P +The native API C data types, function prototypes, option values, and error +codes are defined in the header file \fBpcre2.h\fP, which also contains +definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers +for the library. Applications can use these to include support for different +releases of PCRE2. +.P +In a Windows environment, if you want to statically link an application program +against a non-dll PCRE2 library, you must define PCRE2_STATIC before including +\fBpcre2.h\fP. +.P +The functions \fBpcre2_compile()\fP and \fBpcre2_match()\fP are used for +compiling and matching regular expressions in a Perl-compatible manner. A +sample program that demonstrates the simplest way of using them is provided in +the file called \fIpcre2demo.c\fP in the PCRE2 source distribution. A listing +of this program is given in the +.\" HREF +\fBpcre2demo\fP +.\" +documentation, and the +.\" HREF +\fBpcre2sample\fP +.\" +documentation describes how to compile and run it. +.P +The compiling and matching functions recognize various options that are passed +as bits in an options argument. There are also some more complicated parameters +such as custom memory management functions and resource limits that are passed +in "contexts" (which are just memory blocks, described below). Simple +applications do not need to make use of contexts. +.P +Just-in-time (JIT) compiler support is an optional feature of PCRE2 that can be +built in appropriate hardware environments. It greatly speeds up the matching +performance of many patterns. Programs can request that it be used if +available by calling \fBpcre2_jit_compile()\fP after a pattern has been +successfully compiled by \fBpcre2_compile()\fP. This does nothing if JIT +support is not available. +.P +More complicated programs might need to make use of the specialist functions +\fBpcre2_jit_stack_create()\fP, \fBpcre2_jit_stack_free()\fP, and +\fBpcre2_jit_stack_assign()\fP in order to control the JIT code's memory usage. +.P +JIT matching is automatically used by \fBpcre2_match()\fP if it is available, +unless the PCRE2_NO_JIT option is set. There is also a direct interface for JIT +matching, which gives improved performance at the expense of less sanity +checking. The JIT-specific functions are discussed in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. +.P +A second matching function, \fBpcre2_dfa_match()\fP, which is not +Perl-compatible, is also provided. This uses a different algorithm for the +matching. The alternative algorithm finds all possible matches (at a given +point in the subject), and scans the subject just once (unless there are +lookaround assertions). However, this algorithm does not return captured +substrings. A description of the two matching algorithms and their advantages +and disadvantages is given in the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. There is no JIT support for \fBpcre2_dfa_match()\fP. +.P +In addition to the main compiling and matching functions, there are convenience +functions for extracting captured substrings from a subject string that has +been matched by \fBpcre2_match()\fP. They are: +.sp + \fBpcre2_substring_copy_byname()\fP + \fBpcre2_substring_copy_bynumber()\fP + \fBpcre2_substring_get_byname()\fP + \fBpcre2_substring_get_bynumber()\fP + \fBpcre2_substring_list_get()\fP + \fBpcre2_substring_length_byname()\fP + \fBpcre2_substring_length_bynumber()\fP + \fBpcre2_substring_nametable_scan()\fP + \fBpcre2_substring_number_from_name()\fP +.sp +\fBpcre2_substring_free()\fP and \fBpcre2_substring_list_free()\fP are also +provided, to free memory used for extracted strings. If either of these +functions is called with a NULL argument, the function returns immediately +without doing anything. +.P +The function \fBpcre2_substitute()\fP can be called to match a pattern and +return a copy of the subject string with substitutions for parts that were +matched. +.P +Functions whose names begin with \fBpcre2_serialize_\fP are used for saving +compiled patterns on disc or elsewhere, and reloading them later. +.P +Finally, there are functions for finding out information about a compiled +pattern (\fBpcre2_pattern_info()\fP) and about the configuration with which +PCRE2 was built (\fBpcre2_config()\fP). +.P +Functions with names ending with \fB_free()\fP are used for freeing memory +blocks of various sorts. In all cases, if one of these functions is called with +a NULL argument, it does nothing. +. +. +.SH "STRING LENGTHS AND OFFSETS" +.rs +.sp +The PCRE2 API uses string lengths and offsets into strings of code units in +several places. These values are always of type PCRE2_SIZE, which is an +unsigned integer type, currently always defined as \fIsize_t\fP. The largest +value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved +as a special indicator for zero-terminated strings and unset offsets. +Therefore, the longest string that can be handled is one less than this +maximum. Note that string lengths are always given in code units. Only in the +8-bit library is such a length the same as the number of bytes in the string. +. +. +.\" HTML +.SH NEWLINES +.rs +.sp +PCRE2 supports five different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, or any +Unicode newline sequence. The Unicode newline sequences are the three just +mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +.P +Each of the first three conventions is used by at least one operating system as +its standard newline sequence. When PCRE2 is built, a default can be specified. +If it is not, the default is set to LF, which is the Unix standard. However, +the newline convention can be changed by an application when calling +\fBpcre2_compile()\fP, or it can be specified by special text at the start of +the pattern itself; this overrides any other settings. See the +.\" HREF +\fBpcre2pattern\fP +.\" +page for details of the special character sequences. +.P +In the PCRE2 documentation the word "newline" is used to mean "the character or +pair of characters that indicate a line break". The choice of newline +convention affects the handling of the dot, circumflex, and dollar +metacharacters, the handling of #-comments in /x mode, and, when CRLF is a +recognized line ending sequence, the match position advancement for a +non-anchored pattern. There is more detail about this in the +.\" HTML +.\" +section on \fBpcre2_match()\fP options +.\" +below. +.P +The choice of newline convention does not affect the interpretation of +the \en or \er escape sequences, nor does it affect what \eR matches; this has +its own separate convention. +. +. +.SH MULTITHREADING +.rs +.sp +In a multithreaded application it is important to keep thread-specific data +separate from data that can be shared between threads. The PCRE2 library code +itself is thread-safe: it contains no static or global variables. The API is +designed to be fairly simple for non-threaded applications while at the same +time ensuring that multithreaded applications can use it. +.P +There are several different blocks of data that are used to pass information +between the application and the PCRE2 libraries. +. +. +.SS "The compiled pattern" +.rs +.sp +A pointer to the compiled form of a pattern is returned to the user when +\fBpcre2_compile()\fP is successful. The data in the compiled pattern is fixed, +and does not change when the pattern is matched. Therefore, it is thread-safe, +that is, the same compiled pattern can be used by more than one thread +simultaneously. For example, an application can compile all its patterns at the +start, before forking off multiple threads that use them. However, if the +just-in-time (JIT) optimization feature is being used, it needs separate memory +stack areas for each thread. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details. +.P +In a more complicated situation, where patterns are compiled only when they are +first needed, but are still shared between threads, pointers to compiled +patterns must be protected from simultaneous writing by multiple threads. This +is somewhat tricky to do correctly. If you know that writing to a pointer is +atomic in your environment, you can use logic like this: +.sp + Get a read-only (shared) lock (mutex) for pointer + if (pointer == NULL) + { + Get a write (unique) lock for pointer + if (pointer == NULL) pointer = pcre2_compile(... + } + Release the lock + Use pointer in pcre2_match() +.sp +Of course, testing for compilation errors should also be included in the code. +.P +The reason for checking the pointer a second time is as follows: Several +threads may have acquired the shared lock and tested the pointer for being +NULL, but only one of them will be given the write lock, with the rest kept +waiting. The winning thread will compile the pattern and store the result. +After this thread releases the write lock, another thread will get it, and if +it does not retest pointer for being NULL, will recompile the pattern and +overwrite the pointer, creating a memory leak and possibly causing other +issues. +.P +In an environment where writing to a pointer may not be atomic, the above logic +is not sufficient. The thread that is doing the compiling may be descheduled +after writing only part of the pointer, which could cause other threads to use +an invalid value. Instead of checking the pointer itself, a separate "pointer +is valid" flag (that can be updated atomically) must be used: +.sp + Get a read-only (shared) lock (mutex) for pointer + if (!pointer_is_valid) + { + Get a write (unique) lock for pointer + if (!pointer_is_valid) + { + pointer = pcre2_compile(... + pointer_is_valid = TRUE + } + } + Release the lock + Use pointer in pcre2_match() +.sp +If JIT is being used, but the JIT compilation is not being done immediately +(perhaps waiting to see if the pattern is used often enough), similar logic is +required. JIT compilation updates a value within the compiled code block, so a +thread must gain unique write access to the pointer before calling +\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or +\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the +compiled code before calling the JIT compiler. +. +. +.SS "Context blocks" +.rs +.sp +The next main section below introduces the idea of "contexts" in which PCRE2 +functions are called. A context is nothing more than a collection of parameters +that control the way PCRE2 operates. Grouping a number of parameters together +in a context is a convenient way of passing them to a PCRE2 function without +using lots of arguments. The parameters that are stored in contexts are in some +sense "advanced features" of the API. Many straightforward applications will +not need to use contexts. +.P +In a multithreaded application, if the parameters in a context are values that +are never changed, the same context can be used by all the threads. However, if +any thread needs to change any value in a context, it must make its own +thread-specific copy. +. +. +.SS "Match blocks" +.rs +.sp +The matching functions need a block of memory for storing the results of a +match. This includes details of what was matched, as well as additional +information such as the name of a (*MARK) setting. Each thread must provide its +own copy of this memory. +. +. +.SH "PCRE2 CONTEXTS" +.rs +.sp +Some PCRE2 functions have a lot of parameters, many of which are used only by +specialist applications, for example, those that use custom memory management +or non-standard character tables. To keep function argument lists at a +reasonable size, and at the same time to keep the API extensible, "uncommon" +parameters are passed to certain functions in a \fBcontext\fP instead of +directly. A context is just a block of memory that holds the parameter values. +Applications that do not need to adjust any of the context parameters can pass +NULL when a context pointer is required. +.P +There are three different types of context: a general context that is relevant +for several PCRE2 operations, a compile-time context, and a match-time context. +. +. +.SS "The general context" +.rs +.sp +At present, this context just contains pointers to (and data for) external +memory management functions that are called from several places in the PCRE2 +library. The context is named `general' rather than specifically `memory' +because in future other fields may be added. If you do not want to supply your +own custom memory management functions, you do not need to bother with a +general context. A general context is created by: +.sp +.nf +.B pcre2_general_context *pcre2_general_context_create( +.B " void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *)," +.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" +.fi +.sp +The two function pointers specify custom memory management functions, whose +prototypes are: +.sp + \fBvoid *private_malloc(PCRE2_SIZE, void *);\fP + \fBvoid private_free(void *, void *);\fP +.sp +Whenever code in PCRE2 calls these functions, the final argument is the value +of \fImemory_data\fP. Either of the first two arguments of the creation +function may be NULL, in which case the system memory management functions +\fImalloc()\fP and \fIfree()\fP are used. (This is not currently useful, as +there are no other fields in a general context, but in future there might be.) +The \fIprivate_malloc()\fP function is used (if supplied) to obtain memory for +storing the context, and all three values are saved as part of the context. +.P +Whenever PCRE2 creates a data block of any kind, the block contains a pointer +to the \fIfree()\fP function that matches the \fImalloc()\fP function that was +used. When the time comes to free the block, this function is called. +.P +A general context can be copied by calling: +.sp +.nf +.B pcre2_general_context *pcre2_general_context_copy( +.B " pcre2_general_context *\fIgcontext\fP);" +.fi +.sp +The memory used for a general context should be freed by calling: +.sp +.nf +.B void pcre2_general_context_free(pcre2_general_context *\fIgcontext\fP); +.fi +.sp +If this function is passed a NULL argument, it returns immediately without +doing anything. +. +. +.\" HTML +.SS "The compile context" +.rs +.sp +A compile context is required if you want to provide an external function for +stack checking during compilation or to change the default values of any of the +following compile-time parameters: +.sp + What \eR matches (Unicode newlines or CR, LF, CRLF only) + PCRE2's character tables + The newline character sequence + The compile time nested parentheses limit + The maximum length of the pattern string + The extra options bits (none set by default) + Which performance optimizations the compiler should apply +.sp +A compile context is also required if you are using custom memory management. +If none of these apply, just pass NULL as the context argument of +\fIpcre2_compile()\fP. +.P +A compile context is created, copied, and freed by the following functions: +.sp +.nf +.B pcre2_compile_context *pcre2_compile_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_compile_context *pcre2_compile_context_copy( +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); +.fi +.sp +A compile context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +.sp +.nf +.B int pcre2_set_bsr(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF, +or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line +ending sequence. The value is used by the JIT compiler and by the two +interpreted matching functions, \fIpcre2_match()\fP and +\fIpcre2_dfa_match()\fP. +.sp +.nf +.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +.sp +The value must be the result of a call to \fBpcre2_maketables()\fP, whose only +argument is a general context. This function builds a set of character tables +in the current locale. +.sp +.nf +.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIextra_options\fP);" +.fi +.sp +As PCRE2 has developed, almost all the 32 option bits that are available in +the \fIoptions\fP argument of \fBpcre2_compile()\fP have been used up. To avoid +running out, the compile context contains a set of extra option bits which are +used for some newer, assumed rarer, options. This function sets those bits. It +always sets all the bits (either on or off). It does not modify any existing +setting. The available options are defined in the section entitled "Extra +compile options" +.\" HTML +.\" +below. +.\" +.sp +.nf +.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +This sets a maximum length, in code units, for any pattern string that is +compiled with this context. If the pattern is longer, an error is generated. +This facility is provided so that applications that accept patterns from +external sources can limit their size. The default is the largest number that a +PCRE2_SIZE variable can hold, which is effectively unlimited. +.sp +.nf +.B int pcre2_set_max_pattern_compiled_length( +.B " pcre2_compile_context *\fIccontext\fP, PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +This sets a maximum size, in bytes, for the memory needed to hold the compiled +version of a pattern that is compiled with this context. If the pattern needs +more memory, an error is generated. This facility is provided so that +applications that accept patterns from external sources can limit the amount of +memory they use. The default is the largest number that a PCRE2_SIZE variable +can hold, which is effectively unlimited. +.sp +.nf +.B int pcre2_set_max_varlookbehind(pcre2_compile_contest *\fIccontext\fP, +.B " uint32_t \fIvalue\fP); +.fi +.sp +This sets a maximum length for the number of characters matched by a +variable-length lookbehind assertion. The default is set when PCRE2 is built, +with the ultimate default being 255, the same as Perl. Lookbehind assertions +without a bounding length are not supported. +.sp +.nf +.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +This specifies which characters or character sequences are to be recognized as +newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), +PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character +sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), +PCRE2_NEWLINE_ANY (any Unicode newline sequence), or PCRE2_NEWLINE_NUL (the +NUL character, that is a binary zero). +.P +A pattern can override the value set in the compile context by starting with a +sequence such as (*CRLF). See the +.\" HREF +\fBpcre2pattern\fP +.\" +page for details. +.P +When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE +option, the newline convention affects the recognition of the end of internal +comments starting with #. The value is saved with the compiled pattern for +subsequent use by the JIT compiler and by the two interpreted matching +functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP. +.sp +.nf +.B int pcre2_set_parens_nest_limit(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +This parameter adjusts the limit, set when PCRE2 is built (default 250), on the +depth of parenthesis nesting in a pattern. This limit stops rogue patterns +using up too much system stack when being compiled. The limit applies to +parentheses of all kinds, not just capturing parentheses. +.sp +.nf +.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP, +.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);" +.fi +.sp +There is at least one application that runs PCRE2 in threads with very limited +system stack, where running out of stack is to be avoided at all costs. The +parenthesis limit above cannot take account of how much stack is actually +available during compilation. For a finer control, you can supply a function +that is called whenever \fBpcre2_compile()\fP starts to compile a parenthesized +part of a pattern. This function can check the actual stack size (or anything +else that it wants to, of course). +.P +The first argument to the callout function gives the current depth of +nesting, and the second is user data that is set up by the last argument of +\fBpcre2_set_compile_recursion_guard()\fP. The callout function should return +zero if all is well, or non-zero to force an error. +.sp +.nf +.B int pcre2_set_optimize(pcre2_compile_context *\fIccontext\fP, +.B " uint32_t \fIdirective\fP);" +.fi +.sp +PCRE2 can apply various performance optimizations during compilation, in order +to make matching faster. For example, the compiler might convert some regex +constructs into an equivalent construct which \fBpcre2_match()\fP can execute +faster. By default, all available optimizations are enabled. However, in rare +cases, one might wish to disable specific optimizations. For example, if it is +known that some optimizations cannot benefit a certain regex, it might be +desirable to disable them, in order to speed up compilation. +.P +The permitted values of \fIdirective\fP are as follows: +.sp + PCRE2_OPTIMIZATION_FULL +.sp +Enable all optional performance optimizations. This is the default value. +.sp + PCRE2_OPTIMIZATION_NONE +.sp +Disable all optional performance optimizations. +.sp + PCRE2_AUTO_POSSESS + PCRE2_AUTO_POSSESS_OFF +.sp +Enable/disable "auto-possessification" of variable quantifiers such as * and +. +This optimization, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +disable this optimization if you want the matching functions to do a full, +unoptimized search and run all the callouts. +.sp + PCRE2_DOTSTAR_ANCHOR + PCRE2_DOTSTAR_ANCHOR_OFF +.sp +Enable/disable an optimization that is applied when .* is the first significant +item in a top-level branch of a pattern, and all the other branches also start +with .* or with \eA or \eG or ^. Such a pattern is automatically anchored if +PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set for any +^ items. Otherwise, the fact that any match must start either at the start of +the subject or following a newline is remembered. Like other optimizations, +this can cause callouts to be skipped. +.P +Dotstar anchor optimization is automatically disabled for .* if it is inside an +atomic group or a capture group that is the subject of a backreference, or if +the pattern contains (*PRUNE) or (*SKIP). +.sp + PCRE2_START_OPTIMIZE + PCRE2_START_OPTIMIZE_OFF +.sp +Enable/disable optimizations which cause matching functions to scan the subject +string for specific code unit values before attempting a match. For example, if +it is known that an unanchored match must start with a specific value, the +matching code searches the subject for that value, and fails immediately if it +cannot find it, without actually running the main matching function. This means +that a special item such as (*COMMIT) at the start of a pattern is not +considered until after a suitable starting point for the match has been found. +Also, when callouts or (*MARK) items are in use, these "start-up" optimizations +can cause them to be skipped if the pattern is never actually used. The start-up +optimizations are in effect a pre-scan of the subject that takes place before +the pattern is run. +.P +Disabling start-up optimizations ensures that in cases where the result is "no +match", the callouts do occur, and that items such as (*COMMIT) and (*MARK) are +considered at every possible starting position in the subject string. +.P +Disabling start-up optimizations may change the outcome of a matching operation. +Consider the pattern +.sp + (*COMMIT)ABC +.sp +When this is compiled, PCRE2 records the fact that a match must start with the +character "A". Suppose the subject string is "DEFABC". The start-up +optimization scans along the subject, finds "A" and runs the first match +attempt from there. The (*COMMIT) item means that the pattern must match the +current starting position, which in this case, it does. However, if the same +match is run without start-up optimizations, the initial scan along the subject +string does not happen. The first match attempt is run starting from "D" and +when this fails, (*COMMIT) prevents any further matches being tried, so the +overall result is "no match". +.P +Another start-up optimization makes use of a minimum length for a matching +subject, which is recorded when possible. Consider the pattern +.sp + (*MARK:1)B(*MARK:2)(X|Y) +.sp +The minimum length for a match is two characters. If the subject is "XXBB", the +"starting character" optimization skips "XX", then tries to match "BB", which +is long enough. In the process, (*MARK:2) is encountered and remembered. When +the match attempt fails, the next "B" is found, but there is only one character +left, so there are no more attempts, and "no match" is returned with the "last +mark seen" set to "2". Without start-up optimizations, however, matches are +tried at every possible starting position, including at the end of the subject, +where (*MARK:1) is encountered, but there is no "B", so the "last mark seen" +that is returned is "1". In this case, the optimizations do not affect the +overall match result, which is still "no match", but they do affect the +auxiliary information that is returned. +. +. +.\" HTML +.SS "The match context" +.rs +.sp +A match context is required if you want to: +.sp + Set up a callout function + Set an offset limit for matching an unanchored pattern + Change the limit on the amount of heap used when matching + Change the backtracking match limit + Change the backtracking depth limit + Set custom memory management specifically for the match +.sp +If none of these apply, just pass NULL as the context argument of +\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP. +.P +A match context is created, copied, and freed by the following functions: +.sp +.nf +.B pcre2_match_context *pcre2_match_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_context *pcre2_match_context_copy( +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); +.fi +.sp +A match context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or +PCRE2_ERROR_BADDATA if invalid data is detected. +.sp +.nf +.B int pcre2_set_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +This sets up a callout function for PCRE2 to call at specified points +during a matching operation. Details are given in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +.sp +.nf +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +This sets up a callout function for PCRE2 to call after each substitution +made by \fBpcre2_substitute()\fP. Details are given in the section entitled +"Creating a new string with substitutions" +.\" HTML +.\" +below. +.\" +.sp +.nf +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +This sets up a callout function for PCRE2 to call when performing case +transformations inside \fBpcre2_substitute()\fP. Details are given in the +section entitled "Creating a new string with substitutions" +.\" HTML +.\" +below. +.\" +.sp +.nf +.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE \fIvalue\fP);" +.fi +.sp +The \fIoffset_limit\fP parameter limits how far an unanchored search can +advance in the subject string. The default value is PCRE2_UNSET. The +\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP functions return +PCRE2_ERROR_NOMATCH if a match with a starting point before or at the given +offset is not found. The \fBpcre2_substitute()\fP function makes no more +substitutions. +.P +For example, if the pattern /abc/ is matched against "123abc" with an offset +limit less than 3, the result is PCRE2_ERROR_NOMATCH. A match can never be +found if the \fIstartoffset\fP argument of \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP, or \fBpcre2_substitute()\fP is greater than the offset +limit set in the match context. +.P +When using this facility, you must set the PCRE2_USE_OFFSET_LIMIT option when +calling \fBpcre2_compile()\fP so that when JIT is in use, different code can be +compiled. If a match is started with a non-default match limit when +PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. +.P +The offset limit facility can be used to track progress when searching large +subject strings or to limit the extent of global substitutions. See also the +PCRE2_FIRSTLINE option, which requires a match to start before or at the first +newline that follows the start of matching in the subject. If this is set with +an offset limit, a match must occur in the first line and also within the +offset limit. In other words, whichever limit comes first is used. +.sp +.nf +.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +The \fIheap_limit\fP parameter specifies, in units of kibibytes (1024 bytes), +the maximum amount of heap memory that \fBpcre2_match()\fP may use to hold +backtracking information when running an interpretive match. This limit also +applies to \fBpcre2_dfa_match()\fP, which may use the heap when processing +patterns with a lot of nested pattern recursion or lookarounds or atomic +groups. This limit does not apply to matching with the JIT optimization, which +has its own memory control arrangements (see the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details). If the limit is reached, the negative error +code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2 +is built; if it is not, the default is set very large and is essentially +unlimited. +.P +A value for the heap limit may also be supplied by an item at the start of a +pattern of the form +.sp + (*LIMIT_HEAP=ddd) +.sp +where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of \fBpcre2_match()\fP or, if no such +limit is set, less than the default. +.P +The \fBpcre2_match()\fP function always needs some heap memory, so setting a +value of zero guarantees a "heap limit exceeded" error. Details of how +\fBpcre2_match()\fP uses the heap are given in the +.\" HREF +\fBpcre2perform\fP +.\" +documentation. +.P +For \fBpcre2_dfa_match()\fP, a vector on the system stack is used when +processing pattern recursions, lookarounds, or atomic groups, and only if this +is not big enough is heap memory used. In this case, setting a value of zero +disables the use of the heap. +.sp +.nf +.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using +up too many computing resources when processing patterns that are not going to +match, but which have a very large number of possibilities in their search +trees. The classic example is a pattern that uses nested unlimited repeats. +.P +There is an internal counter in \fBpcre2_match()\fP that is incremented each +time round its main matching loop. If this value reaches the match limit, +\fBpcre2_match()\fP returns the negative value PCRE2_ERROR_MATCHLIMIT. This has +the effect of limiting the amount of backtracking that can take place. For +patterns that are not anchored, the count restarts from zero for each position +in the subject string. This limit also applies to \fBpcre2_dfa_match()\fP, +though the counting is done in a different way. +.P +When \fBpcre2_match()\fP is called with a pattern that was successfully +processed by \fBpcre2_jit_compile()\fP, the way in which matching is executed +is entirely different. However, there is still the possibility of runaway +matching that goes on for a very long time, and so the \fImatch_limit\fP value +is also used in this case (but in a different way) to limit how long the +matching can continue. +.P +The default value for the limit can be set when PCRE2 is built; the default is +10 million, which handles all but the most extreme cases. A value for the match +limit may also be supplied by an item at the start of a pattern of the form +.sp + (*LIMIT_MATCH=ddd) +.sp +where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP or, if no such limit is set, less than the default. +.sp +.nf +.B int pcre2_set_depth_limit(pcre2_match_context *\fImcontext\fP, +.B " uint32_t \fIvalue\fP);" +.fi +.sp +This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP. +Each time a nested backtracking point is passed, a new memory frame is used +to remember the state of matching at that point. Thus, this parameter +indirectly limits the amount of memory that is used in a match. However, +because the size of each memory frame depends on the number of capturing +parentheses, the actual memory limit varies from pattern to pattern. This limit +was more useful in versions before 10.30, where function recursion was used for +backtracking. +.P +The depth limit is not relevant, and is ignored, when matching is done using +JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which +uses it to limit the depth of nested internal recursive function calls that +implement atomic groups, lookaround assertions, and pattern recursions. This +limits, indirectly, the amount of system stack that is used. It was more useful +in versions before 10.32, when stack memory was used for local workspace +vectors for recursive function calls. From version 10.32, only local variables +are allocated on the stack and as each call uses only a few hundred bytes, even +a small stack can support quite a lot of recursion. +.P +If the depth of internal recursive function calls is great enough, local +workspace vectors are allocated on the heap from version 10.32 onwards, so the +depth limit also indirectly limits the amount of heap memory that is used. A +recursive pattern such as /(.(?2))((?1)|)/, when matched to a very long string +using \fBpcre2_dfa_match()\fP, can use a great deal of memory. However, it is +probably better to limit heap usage directly by calling +\fBpcre2_set_heap_limit()\fP. +.P +The default value for the depth limit can be set when PCRE2 is built; if it is +not, the default is set to the same value as the default for the match limit. +If the limit is exceeded, \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP +returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth limit may also be +supplied by an item at the start of a pattern of the form +.sp + (*LIMIT_DEPTH=ddd) +.sp +where ddd is a decimal number. However, such a setting is ignored unless ddd is +less than the limit set by the caller of \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP or, if no such limit is set, less than the default. +. +. +.SH "CHECKING BUILD-TIME OPTIONS" +.rs +.sp +.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP); +.P +The function \fBpcre2_config()\fP makes it possible for a PCRE2 client to find +the value of certain configuration parameters and to discover which optional +features have been compiled into the PCRE2 library. The +.\" HREF +\fBpcre2build\fP +.\" +documentation has more details about these features. +.P +The first argument for \fBpcre2_config()\fP specifies which information is +required. The second argument is a pointer to memory into which the information +is placed. If NULL is passed, the function returns the amount of memory that is +needed for the requested information. For calls that return numerical values, +the value is in bytes; when requesting these values, \fIwhere\fP should point +to appropriately aligned memory. For calls that return strings, the required +length is given in code units, not counting the terminating zero. +.P +When requesting information, the returned value from \fBpcre2_config()\fP is +non-negative on success, or the negative error code PCRE2_ERROR_BADOPTION if +the value in the first argument is not recognized. The following information is +available: +.sp + PCRE2_CONFIG_BSR +.sp +The output is a uint32_t integer whose value indicates what character +sequences the \eR escape sequence matches by default. A value of +PCRE2_BSR_UNICODE means that \eR matches any Unicode line ending sequence; a +value of PCRE2_BSR_ANYCRLF means that \eR matches only CR, LF, or CRLF. The +default can be overridden when a pattern is compiled. +.sp + PCRE2_CONFIG_COMPILED_WIDTHS +.sp +The output is a uint32_t integer whose lower bits indicate which code unit +widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support, +and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively. +.sp + PCRE2_CONFIG_DEPTHLIMIT +.sp +The output is a uint32_t integer that gives the default limit for the depth of +nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions, +lookarounds, and atomic groups in \fBpcre2_dfa_match()\fP. Further details are +given with \fBpcre2_set_depth_limit()\fP above. +.sp + PCRE2_CONFIG_HEAPLIMIT +.sp +The output is a uint32_t integer that gives, in kibibytes, the default limit +for the amount of heap memory used by \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP. Further details are given with +\fBpcre2_set_heap_limit()\fP above. +.sp + PCRE2_CONFIG_JIT +.sp +The output is a uint32_t integer that is set to one if support for just-in-time +compiling is included in the library; otherwise it is set to zero. Note that +having the support in the library does not guarantee that JIT will be used for +any given match, and neither does it guarantee that JIT will actually be able +to function, because it may not be able to allocate executable memory in some +environments. There is a special call to \fBpcre2_jit_compile()\fP that can be +used to check this. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details. +.sp + PCRE2_CONFIG_JITTARGET +.sp +The \fIwhere\fP argument should point to a buffer that is at least 48 code +units long. (The exact length required can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a +string that contains the name of the architecture for which the JIT compiler is +configured, for example "x86 32bit (little endian + unaligned)". If JIT support +is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the number of +code units used is returned. This is the length of the string, plus one unit +for the terminating zero. +.sp + PCRE2_CONFIG_LINKSIZE +.sp +The output is a uint32_t integer that contains the number of bytes used for +internal linkage in compiled regular expressions. When PCRE2 is configured, the +value can be set to 2, 3, or 4, with the default being 2. This is the value +that is returned by \fBpcre2_config()\fP. However, when the 16-bit library is +compiled, a value of 3 is rounded up to 4, and when the 32-bit library is +compiled, internal linkages always use 4 bytes, so the configured value is not +relevant. +.P +The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all +but the most massive patterns, since it allows the size of the compiled pattern +to be up to 65535 code units. Larger values allow larger regular expressions to +be compiled by those two libraries, but at the expense of slower matching. +.sp + PCRE2_CONFIG_MATCHLIMIT +.sp +The output is a uint32_t integer that gives the default match limit for +\fBpcre2_match()\fP. Further details are given with +\fBpcre2_set_match_limit()\fP above. +.sp + PCRE2_CONFIG_NEWLINE +.sp +The output is a uint32_t integer whose value specifies the default character +sequence that is recognized as meaning "newline". The values are: +.sp + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) +.sp +The default should normally correspond to the standard sequence for your +operating system. +.sp + PCRE2_CONFIG_NEVER_BACKSLASH_C +.sp +The output is a uint32_t integer that is set to one if the use of \eC was +permanently disabled when PCRE2 was built; otherwise it is set to zero. +.sp + PCRE2_CONFIG_PARENSLIMIT +.sp +The output is a uint32_t integer that gives the maximum depth of nesting +of parentheses (of any kind) in a pattern. This limit is imposed to cap the +amount of system stack used when a pattern is compiled. It is specified when +PCRE2 is built; the default is 250. This limit does not take into account the +stack that may already be used by the calling application. For finer control +over compilation stack usage, see \fBpcre2_set_compile_recursion_guard()\fP. +.sp + PCRE2_CONFIG_STACKRECURSE +.sp +This parameter is obsolete and should not be used in new code. The output is a +uint32_t integer that is always set to zero. +.sp + PCRE2_CONFIG_TABLES_LENGTH +.sp +The output is a uint32_t integer that gives the length of PCRE2's character +processing tables in bytes. For details of these tables see the +.\" HTML +.\" +section on locale support +.\" +below. +.sp + PCRE2_CONFIG_UNICODE_VERSION +.sp +The \fIwhere\fP argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled +without Unicode support, the buffer is filled with the text "Unicode not +supported". Otherwise, the Unicode version string (for example, "8.0.0") is +inserted. The number of code units used is returned. This is the length of the +string plus one unit for the terminating zero. +.sp + PCRE2_CONFIG_UNICODE +.sp +The output is a uint32_t integer that is set to one if Unicode support is +available; otherwise it is set to zero. Unicode support implies UTF support. +.sp + PCRE2_CONFIG_VERSION +.sp +The \fIwhere\fP argument should point to a buffer that is at least 24 code +units long. (The exact length required can be found by calling +\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with +the PCRE2 version string, zero-terminated. The number of code units used is +returned. This is the length of the string plus one unit for the terminating +zero. +. +. +.\" HTML +.SH "COMPILING A PATTERN" +.rs +.sp +.nf +.B pcre2_code *pcre2_compile(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, int *\fIerrorcode\fP, PCRE2_SIZE *\fIerroroffset,\fP" +.B " pcre2_compile_context *\fIccontext\fP);" +.sp +.B void pcre2_code_free(pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP); +.sp +.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP); +.fi +.P +The \fBpcre2_compile()\fP function compiles a pattern into an internal form. +The pattern is defined by a pointer to a string of code units and a length in +code units. If the pattern is zero-terminated, the length can be specified as +PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a length of zero is treated +as an empty string (NULL with a non-zero length causes an error return). The +function returns a pointer to a block of memory that contains the compiled +pattern and related data, or NULL if an error occurred. +.P +If the compile context argument \fIccontext\fP is NULL, memory for the compiled +pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from +the same memory function that was used for the compile context. The caller must +free the memory by calling \fBpcre2_code_free()\fP when it is no longer needed. +If \fBpcre2_code_free()\fP is called with a NULL argument, it returns +immediately, without doing anything. +.P +The function \fBpcre2_code_copy()\fP makes a copy of the compiled code in new +memory, using the same memory allocator as was used for the original. However, +if the code has been processed by the JIT compiler (see +.\" HTML +.\" +below), +.\" +the JIT information cannot be copied (because it is position-dependent). +The new copy can initially be used only for non-JIT matching, though it can be +passed to \fBpcre2_jit_compile()\fP if required. If \fBpcre2_code_copy()\fP is +called with a NULL argument, it returns NULL. +.P +The \fBpcre2_code_copy()\fP function provides a way for individual threads in a +multithreaded application to acquire a private copy of shared compiled code. +However, it does not make a copy of the character tables used by the compiled +pattern; the new pattern code points to the same tables as the original code. +(See +.\" HTML +.\" +"Locale Support" +.\" +below for details of these character tables.) In many applications the same +tables are used throughout, so this behaviour is appropriate. Nevertheless, +there are occasions when a copy of a compiled pattern and the relevant tables +are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility. +Copies of both the code and the tables are made, with the new code pointing to +the new tables. The memory for the new tables is automatically freed when +\fBpcre2_code_free()\fP is called for the new copy of the compiled code. If +\fBpcre2_code_copy_with_tables()\fP is called with a NULL argument, it returns +NULL. +.P +NOTE: When one of the matching functions is called, pointers to the compiled +pattern and the subject string are set in the match data block so that they can +be referenced by the substring extraction functions after a successful match. +After running a match, you must not free a compiled pattern or a subject string +until after all operations on the +.\" HTML +.\" +match data block +.\" +have taken place, unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for \fBpcre2_match()\fP" +.\" HTML +.\" +below. +.\" +.P +The \fIoptions\fP argument for \fBpcre2_compile()\fP contains various bit +settings that affect the compilation. It should be zero if none of them are +required. The available options are described below. Some of them (in +particular, those that are compatible with Perl, but some others as well) can +also be set and unset from within the pattern (see the detailed description in +the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation). +.P +For those options that can be different in different parts of the pattern, the +contents of the \fIoptions\fP argument specifies their settings at the start of +compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and PCRE2_NO_UTF_CHECK +options can be set at the time of matching as well as at compile time. +.P +Some additional options and less frequently required compile-time parameters +(for example, the newline setting) can be provided in a compile context (as +described +.\" HTML +.\" +above). +.\" +.P +If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns +NULL immediately. Otherwise, the variables to which these point are set to an +error code and an offset (number of code units) within the pattern, +respectively, when \fBpcre2_compile()\fP returns NULL because a compilation +error has occurred. +.P +There are over 100 positive error codes that \fBpcre2_compile()\fP may return +if it finds an error in the pattern. There are also some negative error codes +that are used for invalid UTF strings when validity checking is in force. These +are the same as given by \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and +are described in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. There is no separate documentation for the positive error codes, +because the textual error messages that are obtained by calling the +\fBpcre2_get_error_message()\fP function (see "Obtaining a textual error +message" +.\" HTML +.\" +below) +.\" +should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined +for both positive and negative error codes in \fBpcre2.h\fP. When compilation +is successful \fIerrorcode\fP is set to a value that returns the message "no +error" if passed to \fBpcre2_get_error_message()\fP. +.P +The value returned in \fIerroroffset\fP is an indication of where in the +pattern an error occurred. When there is no error, zero is returned. A non-zero +value is not necessarily the furthest point in the pattern that was read. For +example, after the error "lookbehind assertion is not fixed length", the error +offset points to the start of the failing assertion. For an invalid UTF-8 or +UTF-16 string, the offset is that of the first code unit of the failing +character. +.P +Some errors are not detected until the whole pattern has been scanned; in these +cases, the offset passed back is the length of the pattern. Note that the +offset is in code units, not characters, even in a UTF mode. It may sometimes +point into the middle of a UTF-8 or UTF-16 character. +.P +This code fragment shows a typical straightforward call to +\fBpcre2_compile()\fP: +.sp + pcre2_code *re; + PCRE2_SIZE erroffset; + int errorcode; + re = pcre2_compile( + "^A.*Z", /* the pattern */ + PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ + 0, /* default options */ + &errorcode, /* for error code */ + &erroffset, /* for error offset */ + NULL); /* no compile context */ +.sp +. +. +.SS "Main compile options" +.rs +.sp +The following names for option bits are defined in the \fBpcre2.h\fP header +file: +.sp + PCRE2_ANCHORED +.sp +If this bit is set, the pattern is forced to be "anchored", that is, it is +constrained to match only at the first matching point in the string that is +being searched (the "subject string"). This effect can also be achieved by +appropriate constructs in the pattern itself, which is the only way to do it in +Perl. +.sp + PCRE2_ALLOW_EMPTY_CLASS +.sp +By default, for compatibility with Perl, a closing square bracket that +immediately follows an opening one is treated as a data character for the +class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which +therefore contains no characters and so can never match. +.sp + PCRE2_ALT_BSUX +.sp +This option request alternative handling of three escape sequences, which +makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: +.P +(1) \eU matches an upper case "U" character; by default \eU causes a compile +time error (Perl uses \eU to upper case subsequent characters). +.P +(2) \eu matches a lower case "u" character unless it is followed by four +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, \eu causes a compile time error (Perl uses it to upper +case the following character). +.P +(3) \ex matches a lower case "x" character unless it is followed by two +hexadecimal digits, in which case the hexadecimal number defines the code point +to match. By default, as in Perl, a hexadecimal number is always expected after +\ex, but it may have zero, one, or two digits (so, for example, \exz matches a +binary zero character followed by z). +.P +ECMAscript 6 added additional functionality to \eu. This can be accessed using +the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options" +.\" HTML +.\" +below). +.\" +Note that this alternative escape handling applies only to patterns. Neither of +these options affects the processing of replacement strings passed to +\fBpcre2_substitute()\fP. +.sp + PCRE2_ALT_CIRCUMFLEX +.sp +In multiline mode (when PCRE2_MULTILINE is set), the circumflex metacharacter +matches at the start of the subject (unless PCRE2_NOTBOL is set), and also +after any internal newline. However, it does not match after a newline at the +end of the subject, for compatibility with Perl. If you want a multiline +circumflex also to match after a terminating newline, you must set +PCRE2_ALT_CIRCUMFLEX. +.sp + PCRE2_ALT_EXTENDED_CLASS +.sp +Alters the parsing of character classes to follow the extended syntax +described by Unicode UTS#18. The PCRE2_ALT_EXTENDED_CLASS option has no impact +on the behaviour of the Perl-specific "(?[...])" syntax for extended classes, +but instead enables the alternative syntax of extended class behaviour inside +ordinary "[...]" character classes. See the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation for details of the character classes supported. +.sp + PCRE2_ALT_VERBNAMES +.sp +By default, for compatibility with Perl, the name in any verb sequence such as +(*MARK:NAME) is any sequence of characters that does not include a closing +parenthesis. The name is not processed in any way, and it is not possible to +include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES +option is set, normal backslash processing is applied to verb names and only an +unescaped closing parenthesis terminates the name. A closing parenthesis can be +included in a name either as \e) or between \eQ and \eE. If the PCRE2_EXTENDED +or PCRE2_EXTENDED_MORE option is set with PCRE2_ALT_VERBNAMES, unescaped +whitespace in verb names is skipped and #-comments are recognized, exactly as +in the rest of the pattern. +.sp + PCRE2_AUTO_CALLOUT +.sp +If this bit is set, \fBpcre2_compile()\fP automatically inserts callout items, +all with number 255, before each pattern item, except immediately before or +after an explicit callout in the pattern. For discussion of the callout +facility, see the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +.sp + PCRE2_CASELESS +.sp +If this bit is set, letters in the pattern match both upper and lower case +letters in the subject. It is equivalent to Perl's /i option, and it can be +changed within a pattern by a (?i) option setting. If either PCRE2_UTF or +PCRE2_UCP is set, Unicode properties are used for all characters with more than +one other case, and for all characters whose code points are greater than +U+007F. +.P +Note that there are two ASCII characters, K and S, that, in addition to +their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin +sign) and U+017F (long S) respectively. If you do not want this case +equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT. +.P +One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +.P +For lower valued characters with only one other case, a lookup table is used +for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used +for all code points less than 256, and higher code points (available only in +16-bit or 32-bit mode) are treated as not having another case. +.P +From release 10.45 PCRE2_CASELESS also affects what some of the letter-related +Unicode property escapes (\ep and \eP) match. The properties Lu (upper case +letter), Ll (lower case letter), and Lt (title case letter) are all treated as +LC (cased letter) when PCRE2_CASELESS is set. +.sp + PCRE2_DOLLAR_ENDONLY +.sp +If this bit is set, a dollar metacharacter in the pattern matches only at the +end of the subject string. Without this option, a dollar also matches +immediately before a newline at the end of the string (but not before any other +newlines). The PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is +set. There is no equivalent to this option in Perl, and no way to set it within +a pattern. +.sp + PCRE2_DOTALL +.sp +If this bit is set, a dot metacharacter in the pattern matches any character, +including one that indicates a newline. However, it only ever matches one +character, even if newlines are coded as CRLF. Without this option, a dot does +not match when the current position in the subject is at a newline. This option +is equivalent to Perl's /s option, and it can be changed within a pattern by a +(?s) option setting. A negative class such as [^a] always matches newline +characters, and the \eN escape sequence always matches a non-newline character, +independent of the setting of PCRE2_DOTALL. +.sp + PCRE2_DUPNAMES +.sp +If this bit is set, names used to identify capture groups need not be unique. +This can be helpful for certain types of pattern when it is known that only one +instance of the named group can ever be matched. There are more details of +named capture groups below; see also the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.sp + PCRE2_ENDANCHORED +.sp +If this bit is set, the end of any pattern match must be right at the end of +the string being searched (the "subject string"). If the pattern match +succeeds by reaching (*ACCEPT), but does not reach the end of the subject, the +match fails at the current starting point. For unanchored patterns, a new match +is then tried at the next starting point. However, if the match succeeds by +reaching the end of the pattern, but not the end of the subject, backtracking +occurs and an alternative match may be found. Consider these two patterns: +.sp + .(*ACCEPT)|.. + .|.. +.sp +If matched against "abc" with PCRE2_ENDANCHORED set, the first matches "c" +whereas the second matches "bc". The effect of PCRE2_ENDANCHORED can also be +achieved by appropriate constructs in the pattern itself, which is the only way +to do it in Perl. +.P +For DFA matching with \fBpcre2_dfa_match()\fP, PCRE2_ENDANCHORED applies only +to the first (that is, the longest) matched string. Other parallel matches, +which are necessarily substrings of the first one, must obviously end before +the end of the subject. +.sp + PCRE2_EXTENDED +.sp +If this bit is set, most white space characters in the pattern are totally +ignored except when escaped, inside a character class, or inside a \eQ...\eE +sequence. However, white space is not allowed within sequences such as (?> that +introduce various parenthesized groups, nor within numerical quantifiers such +as {1,3}. Ignorable white space is permitted between an item and a following +quantifier and between a quantifier and a following + that indicates +possessiveness. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be +changed within a pattern by a (?x) option setting. +.P +When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as +white space only those characters with code points less than 256 that are +flagged as white space in its low-character table. The table is normally +created by +.\" HREF +\fBpcre2_maketables()\fP, +.\" +which uses the \fBisspace()\fP function to identify space characters. In most +ASCII environments, the relevant characters are those with code points 0x0009 +(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D +(carriage return), and 0x0020 (space). +.P +When PCRE2 is compiled with Unicode support, in addition to these characters, +five more Unicode "Pattern White Space" characters are recognized by +PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-right mark), +U+200F (right-to-left mark), U+2028 (line separator), and U+2029 (paragraph +separator). This set of characters is the same as recognized by Perl's /x +option. Note that the horizontal and vertical space characters that are matched +by the \eh and \ev escapes in patterns are a much bigger set. +.P +As well as ignoring most white space, PCRE2_EXTENDED also causes characters +between an unescaped # outside a character class and the next newline, +inclusive, to be ignored, which makes it possible to include comments inside +complicated patterns. Note that the end of this type of comment is a literal +newline sequence in the pattern; escape sequences that happen to represent a +newline do not count. +.P +Which characters are interpreted as newlines can be specified by a setting in +the compile context that is passed to \fBpcre2_compile()\fP or by a special +sequence at the start of the pattern, as described in the section entitled +.\" HTML +.\" +"Newline conventions" +.\" +in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is +built. +.sp + PCRE2_EXTENDED_MORE +.sp +This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space +and horizontal tab characters are ignored inside a character class. Note: only +these two characters are ignored, not the full set of pattern white space +characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is +equivalent to Perl's /xx option, and it can be changed within a pattern by a +(?xx) option setting. +.sp + PCRE2_FIRSTLINE +.sp +If this option is set, the start of an unanchored pattern match must be before +or at the first newline in the subject string following the start of matching, +though the matched text may continue over the newline. If \fIstartoffset\fP is +non-zero, the limiting newline is not necessarily the first newline in the +subject. For example, if the subject string is "abc\enxyz" (where \en +represents a single-character newline) a pattern match for "yz" succeeds with +PCRE2_FIRSTLINE if \fIstartoffset\fP is greater than 3. See also +PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If +PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first +line and also within the offset limit. In other words, whichever limit comes +first is used. This option has no effect for anchored patterns. +.sp + PCRE2_LITERAL +.sp +If this option is set, all meta-characters in the pattern are disabled, and it +is treated as a literal string. Matching literal strings with a regular +expression engine is not the most efficient way of doing it. If you are doing a +lot of literal matching and are worried about efficiency, you should consider +using other approaches. The only other main options that are allowed with +PCRE2_LITERAL are: PCRE2_ANCHORED, PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, +PCRE2_CASELESS, PCRE2_FIRSTLINE, PCRE2_MATCH_INVALID_UTF, +PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK, PCRE2_UTF, and +PCRE2_USE_OFFSET_LIMIT. The extra options PCRE2_EXTRA_MATCH_LINE and +PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error. +.sp + PCRE2_MATCH_INVALID_UTF +.sp +This option forces PCRE2_UTF (see below) and also enables support for matching +by \fBpcre2_match()\fP in subject strings that contain invalid UTF sequences. +Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. This facility is not supported for DFA matching. For details, +see the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.sp + PCRE2_MATCH_UNSET_BACKREF +.sp +If this option is set, a backreference to an unset capture group matches an +empty string (by default this causes the current matching alternative to fail). +A pattern such as (\e1)(a) succeeds when this option is set (assuming it can +find an "a" in the subject), whereas it fails by default, for Perl +compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka +JavaScript). +.sp + PCRE2_MULTILINE +.sp +By default, for the purposes of matching "start of line" and "end of line", +PCRE2 treats the subject string as consisting of a single line of characters, +even if it actually contains newlines. The "start of line" metacharacter (^) +matches only at the start of the string, and the "end of line" metacharacter +($) matches only at the end of the string, or before a terminating newline +(except when PCRE2_DOLLAR_ENDONLY is set). Note, however, that unless +PCRE2_DOTALL is set, the "any character" metacharacter (.) does not match at a +newline. This behaviour (for ^, $, and dot) is the same as Perl. +.P +When PCRE2_MULTILINE it is set, the "start of line" and "end of line" +constructs match immediately following or immediately before internal newlines +in the subject string, respectively, as well as at the very start and end. This +is equivalent to Perl's /m option, and it can be changed within a pattern by a +(?m) option setting. Note that the "start of line" metacharacter does not match +after a newline at the end of the subject, for compatibility with Perl. +However, you can change this by setting the PCRE2_ALT_CIRCUMFLEX option. If +there are no newlines in a subject string, or no occurrences of ^ or $ in a +pattern, setting PCRE2_MULTILINE has no effect. +.sp + PCRE2_NEVER_BACKSLASH_C +.sp +This option locks out the use of \eC in the pattern that is being compiled. +This escape can cause unpredictable behaviour in UTF-8 or UTF-16 modes, because +it may leave the current matching point in the middle of a multi-code-unit +character. This option may be useful in applications that process patterns from +external sources. Note that there is also a build-time option that permanently +locks out the use of \eC. +.sp + PCRE2_NEVER_UCP +.sp +This option locks out the use of Unicode properties for handling \eB, \eb, \eD, +\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described +for the PCRE2_UCP option below. In particular, it prevents the creator of the +pattern from enabling this facility by starting the pattern with (*UCP). This +option may be useful in applications that process patterns from external +sources. The option combination PCRE2_UCP and PCRE2_NEVER_UCP causes an error. +.sp + PCRE2_NEVER_UTF +.sp +This option locks out interpretation of the pattern as UTF-8, UTF-16, or +UTF-32, depending on which library is in use. In particular, it prevents the +creator of the pattern from switching to UTF interpretation by starting the +pattern with (*UTF). This option may be useful in applications that process +patterns from external sources. The combination of PCRE2_UTF and +PCRE2_NEVER_UTF causes an error. +.sp + PCRE2_NO_AUTO_CAPTURE +.sp +If this option is set, it disables the use of numbered capturing parentheses in +the pattern. Any opening parenthesis that is not followed by ? behaves as if it +were followed by ?: but named parentheses can still be used for capturing (and +they acquire numbers in the usual way). This is the same as Perl's /n option. +Note that, when this option is set, references to capture groups +(backreferences or recursion/subroutine calls) may only refer to named groups, +though the reference can be by name or by number. +.sp + PCRE2_NO_AUTO_POSSESS +.sp +If this (deprecated) option is set, it disables "auto-possessification", which +is an optimization that, for example, turns a+b into a++b in order to avoid +backtracks into a+ that can never be successful. However, if callouts are in +use, auto-possessification means that some callouts are never taken. You can +set this option if you want the matching functions to do a full unoptimized +search and run all the callouts, but it is mainly provided for testing +purposes. +.P +If a compile context is available, it is recommended to use +\fBpcre2_set_optimize()\fP with the \fIdirective\fP PCRE2_AUTO_POSSESS_OFF rather +than the compile option PCRE2_NO_AUTO_POSSESS. Note that PCRE2_NO_AUTO_POSSESS +takes precedence over the \fBpcre2_set_optimize()\fP optimization directives +PCRE2_AUTO_POSSESS and PCRE2_AUTO_POSSESS_OFF. +.sp + PCRE2_NO_DOTSTAR_ANCHOR +.sp +If this (deprecated) option is set, it disables an optimization that is applied +when .* is the first significant item in a top-level branch of a pattern, and +all the other branches also start with .* or with \eA or \eG or ^. The +optimization is automatically disabled for .* if it is inside an atomic group +or a capture group that is the subject of a backreference, or if the pattern +contains (*PRUNE) or (*SKIP). When the optimization is not disabled, such a +pattern is automatically anchored if PCRE2_DOTALL is set for all the .* items +and PCRE2_MULTILINE is not set for any ^ items. Otherwise, the fact that any +match must start either at the start of the subject or following a newline is +remembered. Like other optimizations, this can cause callouts to be skipped. +(If a compile context is available, it is recommended to use +\fBpcre2_set_optimize()\fP with the \fIdirective\fP PCRE2_DOTSTAR_ANCHOR_OFF +instead.) +.sp + PCRE2_NO_START_OPTIMIZE +.sp +This is an option whose main effect is at matching time. It does not change +what \fBpcre2_compile()\fP generates, but it does affect the output of the JIT +compiler. Setting this option is equivalent to calling \fBpcre2_set_optimize()\fP +with the \fIdirective\fP parameter set to PCRE2_START_OPTIMIZE_OFF. +.P +There are a number of optimizations that may occur at the start of a match, in +order to speed up the process. For example, if it is known that an unanchored +match must start with a specific code unit value, the matching code searches +the subject for that value, and fails immediately if it cannot find it, without +actually running the main matching function. The start-up optimizations are +in effect a pre-scan of the subject that takes place before the pattern is run. +.P +Disabling the start-up optimizations may cause performance to suffer. However, +this may be desirable for patterns which contain callouts or items such as +(*COMMIT) and (*MARK). See the above description of PCRE2_START_OPTIMIZE_OFF +for further details. +.sp + PCRE2_NO_UTF_CHECK +.sp +When PCRE2_UTF is set, the validity of the pattern as a UTF string is +automatically checked. There are discussions about the validity of +.\" HTML +.\" +UTF-8 strings, +.\" +.\" HTML +.\" +UTF-16 strings, +.\" +and +.\" HTML +.\" +UTF-32 strings +.\" +in the +.\" HREF +\fBpcre2unicode\fP +.\" +document. If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a +negative error code. +.P +If you know that your pattern is a valid UTF string, and you want to skip this +check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When +it is set, the effect of passing an invalid UTF string as a pattern is +undefined. It may cause your program to crash or loop. +.P +Note that this option can also be passed to \fBpcre2_match()\fP and +\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject +string. +.P +Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the +error that is given if an escape sequence for an invalid Unicode code point is +encountered in the pattern. In particular, the so-called "surrogate" code +points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences +such as \ex{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option, as described in the section entitled "Extra compile options" +.\" HTML +.\" +below. +.\" +However, this is possible only in UTF-8 and UTF-32 modes, because these values +are not representable in UTF-16. +.sp + PCRE2_UCP +.sp +This option has two effects. Firstly, it change the way PCRE2 processes \eB, +\eb, \eD, \ed, \eS, \es, \eW, \ew, and some of the POSIX character classes. By +default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode +properties are used to classify characters. There are some PCRE2_EXTRA +options (see below) that add finer control to this behaviour. More details are +given in the section on +.\" HTML +.\" +generic character types +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. +.P +The second effect of PCRE2_UCP is to force the use of Unicode properties for +upper/lower casing operations, even when PCRE2_UTF is not set. This makes it +possible to process strings in the 16-bit UCS-2 code. This option is available +only if PCRE2 has been compiled with Unicode support (which is the default). +.P +The PCRE2_EXTRA_CASELESS_RESTRICT option (see above) restricts caseless +matching such that ASCII characters match only ASCII characters and non-ASCII +characters match only non-ASCII characters. The PCRE2_EXTRA_TURKISH_CASING option +(see above) alters the matching of the 'i' characters to follow their behaviour +in Turkish and Azeri languages. For further details on +PCRE2_EXTRA_CASELESS_RESTRICT and PCRE2_EXTRA_TURKISH_CASING, see the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +.sp + PCRE2_UNGREEDY +.sp +This option inverts the "greediness" of the quantifiers so that they are not +greedy by default, but become greedy if followed by "?". It is not compatible +with Perl. It can also be set by a (?U) option setting within the pattern. +.sp + PCRE2_USE_OFFSET_LIMIT +.sp +This option must be set for \fBpcre2_compile()\fP if +\fBpcre2_set_offset_limit()\fP is going to be used to set a non-default offset +limit in a match context for matches that use this pattern. An error is +generated if an offset limit is set without this option. For more details, see +the description of \fBpcre2_set_offset_limit()\fP in the +.\" HTML +.\" +section +.\" +that describes match contexts. See also the PCRE2_FIRSTLINE +option above. +.sp + PCRE2_UTF +.sp +This option causes PCRE2 to regard both the pattern and the subject strings +that are subsequently processed as strings of UTF characters instead of +single-code-unit strings. It is available when PCRE2 is built to include +Unicode support (which is the default). If Unicode support is not available, +the use of this option provokes an error. Details of how PCRE2_UTF changes the +behaviour of PCRE2 are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. In particular, note that it changes the way PCRE2_CASELESS works. +. +. +.\" HTML +.SS "Extra compile options" +.rs +.sp +The option bits that can be set in a compile context by calling the +\fBpcre2_set_compile_extra_options()\fP function are as follows: +.sp + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +.sp +Since release 10.38 PCRE2 has forbidden the use of \eK within lookaround +assertions, following Perl's lead. This option is provided to re-enable the +previous behaviour (act in positive lookarounds, ignore in negative ones) in +case anybody is relying on it. +.sp + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES +.sp +This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is +forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate" +code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode +code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot +therefore be represented in UTF-16. They can be represented in UTF-8 and +UTF-32, but are defined as invalid code points, and cause errors if encountered +in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2. +.P +These values also cause errors if encountered in escape sequences such as +\ex{d912} within a pattern. However, it seems that some applications, when +using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test +for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does +not disable the error that occurs, because it applies only to the testing of +input strings for UTF validity. +.P +If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code +point values in UTF-8 and UTF-32 patterns no longer provoke errors and are +incorporated in the compiled pattern. However, they can only match subject +characters if the matching function is called with PCRE2_NO_UTF_CHECK set. +.sp + PCRE2_EXTRA_ALT_BSUX +.sp +The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in +the way that ECMAscript (aka JavaScript) does. Additional functionality was +defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of +PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal +character code, where hhh.. is any number of hexadecimal digits. +.sp + PCRE2_EXTRA_ASCII_BSD +.sp +This option forces \ed to match only ASCII digits, even when PCRE2_UCP is set. +It can be changed within a pattern by means of the (?aD) option setting. +.sp + PCRE2_EXTRA_ASCII_BSS +.sp +This option forces \es to match only ASCII space characters, even when +PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS) +option setting. +.sp + PCRE2_EXTRA_ASCII_BSW +.sp +This option forces \ew to match only ASCII word characters, even when PCRE2_UCP +is set. It can be changed within a pattern by means of the (?aW) option +setting. +.sp + PCRE2_EXTRA_ASCII_DIGIT +.sp +This option forces the POSIX character classes [:digit:] and [:xdigit:] to +match only ASCII digits, even when PCRE2_UCP is set. It can be changed within +a pattern by means of the (?aT) option setting. +.sp + PCRE2_EXTRA_ASCII_POSIX +.sp +This option forces all the POSIX character classes, including [:digit:] and +[:xdigit:], to match only ASCII characters, even when PCRE2_UCP is set. It can +be changed within a pattern by means of the (?aP) option setting, but note that +this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets +all ASCII restrictions for POSIX classes. +.sp + PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL +.sp +This is a dangerous option. Use with care. By default, an unrecognized escape +such as \ej or a malformed one such as \ex{2z} causes a compile-time error when +detected by \fBpcre2_compile()\fP. Perl is somewhat inconsistent in handling +such items: for example, \ej is treated as a literal "j", and non-hexadecimal +digits in \ex{} are just ignored, though warnings are given in both cases if +Perl's warning switch is enabled. However, a malformed octal number after \eo{ +always causes an error in Perl. +.P +If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to +\fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are +treated as single-character escapes. For example, \ej is a literal "j" and +\ex{2z} is treated as the literal string "x{2z}". Setting this option means +that typos in patterns may go undetected and have unexpected results. Also note +that a sequence such as [\eN{] is interpreted as a malformed attempt at +[\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an +unqualified \eN is a valid escape sequence but is not supported in a character +class. To reiterate: this is a dangerous option. Use with great care. +.sp + PCRE2_EXTRA_CASELESS_RESTRICT +.sp +When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode +rules, which allow for more than two cases per character. There are two +case-equivalent character sets that contain both ASCII and non-ASCII +characters. The ASCII letter S is case-equivalent to U+017f (long S) and the +ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables +recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a +caseless match, both characters must either be ASCII or non-ASCII. The option +can be changed within a pattern by the (*CASELESS_RESTRICT) or (?r) option +settings. +.sp + PCRE2_EXTRA_ESCAPED_CR_IS_LF +.sp +There are some legacy applications where the escape sequence \er in a pattern +is expected to match a newline. If this option is set, \er in a pattern is +converted to \en so that it matches a LF (linefeed) instead of a CR (carriage +return) character. The option does not affect a literal CR in the pattern, nor +does it affect CR specified as an explicit code point such as \ex{0D}. +.sp + PCRE2_EXTRA_MATCH_LINE +.sp +This option is provided for use by the \fB-x\fP option of \fBpcre2grep\fP. It +causes the pattern only to match complete lines. This is achieved by +automatically inserting the code for "^(?:" at the start of the compiled +pattern and ")$" at the end. Thus, when PCRE2_MULTILINE is set, the matched +line may be in the middle of the subject string. This option can be used with +PCRE2_LITERAL. +.sp + PCRE2_EXTRA_MATCH_WORD +.sp +This option is provided for use by the \fB-w\fP option of \fBpcre2grep\fP. It +causes the pattern only to match strings that have a word boundary at the start +and the end. This is achieved by automatically inserting the code for "\eb(?:" +at the start of the compiled pattern and ")\eb" at the end. The option may be +used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is +also set. +.sp + PCRE2_EXTRA_NO_BS0 +.sp +If this option is set (note that its final character is the digit 0) it locks +out the use of the sequence \e0 unless at least one more octal digit follows. +.sp + PCRE2_EXTRA_PYTHON_OCTAL +.sp +If this option is set, PCRE2 follows Python's rules for interpreting octal +escape sequences. The rules for handling sequences such as \e14, which could +be an octal number or a back reference are different. Details are given in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.sp + PCRE2_EXTRA_NEVER_CALLOUT +.sp +If this option is set, PCRE2 treats callouts in the pattern as a syntax error, +returning PCRE2_ERROR_CALLOUT_CALLER_DISABLED. This is useful if the application +knows that a callout will not be provided to \fBpcre2_match()\fP, so that +callouts in the pattern are not silently ignored. +.sp + PCRE2_EXTRA_TURKISH_CASING +.sp +This option alters case-equivalence of the 'i' letters to follow the +alphabet used by Turkish and Azeri languages. The option can be changed within +a pattern by the (*TURKISH_CASING) start-of-pattern setting. Either the UTF or +UCP options must be set. In the 8-bit library, UTF must be set. This option +cannot be combined with PCRE2_EXTRA_CASELESS_RESTRICT. +. +. +.\" HTML +.SH "JUST-IN-TIME (JIT) COMPILATION" +.rs +.sp +.nf +.B int pcre2_jit_compile(pcre2_code *\fIcode\fP, uint32_t \fIoptions\fP); +.sp +.B int pcre2_jit_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.sp +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.sp +.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP, +.B " size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP, +.B " pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);" +.sp +.B void pcre2_jit_stack_free(pcre2_jit_stack *\fIjit_stack\fP); +.fi +.P +These functions provide support for JIT compilation, which, if the just-in-time +compiler is available, further processes a compiled pattern into machine code +that executes much faster than the \fBpcre2_match()\fP interpretive matching +function. Full details are given in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. +.P +JIT compilation is a heavyweight optimization. It can take some time for +patterns to be analyzed, and for one-off matches and simple patterns the +benefit of faster execution might be offset by a much slower compilation time. +Most (but not all) patterns can be optimized by the JIT compiler. +. +. +.\" HTML +.SH "LOCALE SUPPORT" +.rs +.sp +.nf +.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); +.sp +.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP, +.B " const uint8_t *\fItables\fP);" +.fi +.P +PCRE2 handles caseless matching, and determines whether characters are letters, +digits, or whatever, by reference to a set of tables, indexed by character code +point. However, this applies only to characters whose code points are less than +256. By default, higher-valued code points never match escapes such as \ew or +\ed. +.P +When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \ep and \eP, or, alternatively, the +PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and +friends to use Unicode property support instead of the built-in tables. +PCRE2_UCP also causes upper/lower casing operations on characters with code +points greater than 127 to use Unicode properties. These effects apply even +when PCRE2_UTF is not set. There are, however, some PCRE2_EXTRA options (see +above) that can be used to modify or suppress them. +.P +The use of locales with Unicode is discouraged. If you are handling characters +with code points greater than 127, you should either use Unicode support, or +use locales, but not try to mix the two. +.P +PCRE2 contains a built-in set of character tables that are used by default. +These are sufficient for many applications. Normally, the internal tables +recognize only ASCII characters. However, when PCRE2 is built, it is possible +to cause the internal tables to be rebuilt in the default "C" locale of the +local system, which may cause them to be different. +.P +The built-in tables can be overridden by tables supplied by the application +that calls PCRE2. These may be created in a different locale from the default. +As more and more applications change to using Unicode, the need for this locale +support is expected to die away. +.P +External tables are built by calling the \fBpcre2_maketables()\fP function, in +the relevant locale. The only argument to this function is a general context, +which can be used to pass a custom memory allocator. If the argument is NULL, +the system \fBmalloc()\fP is used. The result can be passed to +\fBpcre2_compile()\fP as often as necessary, by creating a compile context and +calling \fBpcre2_set_character_tables()\fP to set the tables pointer therein. +.P +For example, to build and use tables that are appropriate for the French locale +(where accented characters with values greater than 127 are treated as +letters), the following code could be used: +.sp + setlocale(LC_CTYPE, "fr_FR"); + tables = pcre2_maketables(NULL); + ccontext = pcre2_compile_context_create(NULL); + pcre2_set_character_tables(ccontext, tables); + re = pcre2_compile(..., ccontext); +.sp +The locale name "fr_FR" is used on Linux and other Unix-like systems; if you +are using Windows, the name for the French locale is "french". +.P +The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP +is saved with the compiled pattern, and the same tables are used by the +matching functions. Thus, for any single pattern, compilation and matching both +happen in the same locale, but different patterns can be processed in different +locales. +.P +It is the caller's responsibility to ensure that the memory containing the +tables remains available while they are still in use. When they are no longer +needed, you can discard them using \fBpcre2_maketables_free()\fP, which should +pass as its first parameter the same global context that was used to create the +tables. +. +. +.SS "Saving locale tables" +.rs +.sp +The tables described above are just a sequence of binary bytes, which makes +them independent of hardware characteristics such as endianness or whether the +processor is 32-bit or 64-bit. A copy of the result of \fBpcre2_maketables()\fP +can therefore be saved in a file or elsewhere and re-used later, even in a +different program or on another computer. The size of the tables (number of +bytes) must be obtained by calling \fBpcre2_config()\fP with the +PCRE2_CONFIG_TABLES_LENGTH option because \fBpcre2_maketables()\fP does not +return this value. Note that the \fBpcre2_dftables\fP program, which is part of +the PCRE2 build system, can be used stand-alone to create a file that contains +a set of binary tables. See the +.\" HTML +.\" +\fBpcre2build\fP +.\" +documentation for details. +. +. +.\" HTML +.SH "INFORMATION ABOUT A COMPILED PATTERN" +.rs +.sp +.nf +.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP); +.fi +.P +The \fBpcre2_pattern_info()\fP function returns general information about a +compiled pattern. For information about callouts, see the +.\" HTML +.\" +next section. +.\" +The first argument for \fBpcre2_pattern_info()\fP is a pointer to the compiled +pattern. The second argument specifies which piece of information is required, +and the third argument is a pointer to a variable to receive the data. If the +third argument is NULL, the first argument is ignored, and the function returns +the size in bytes of the variable that is required for the information +requested. Otherwise, the yield of the function is zero for success, or one of +the following negative numbers: +.sp + PCRE2_ERROR_NULL the argument \fIcode\fP was NULL + PCRE2_ERROR_BADMAGIC the "magic number" was not found + PCRE2_ERROR_BADOPTION the value of \fIwhat\fP was invalid + PCRE2_ERROR_UNSET the requested field is not set +.sp +The "magic number" is placed at the start of each compiled pattern as a simple +check against passing an arbitrary memory pointer. Here is a typical call of +\fBpcre2_pattern_info()\fP, to obtain the length of the compiled pattern: +.sp + int rc; + size_t length; + rc = pcre2_pattern_info( + re, /* result of pcre2_compile() */ + PCRE2_INFO_SIZE, /* what is required */ + &length); /* where to put the data */ +.sp +The possible values for the second argument are defined in \fBpcre2.h\fP, and +are as follows: +.sp + PCRE2_INFO_ALLOPTIONS + PCRE2_INFO_ARGOPTIONS + PCRE2_INFO_EXTRAOPTIONS +.sp +Return copies of the pattern's options. The third argument should point to a +\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that +were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns +the compile options as modified by any top-level (*XXX) option settings such as +(*UTF) at the start of the pattern itself. PCRE2_INFO_EXTRAOPTIONS returns the +extra options that were set in the compile context by calling the +pcre2_set_compile_extra_options() function. +.P +For example, if the pattern /(*UTF)abc/ is compiled with the PCRE2_EXTENDED +option, the result for PCRE2_INFO_ALLOPTIONS is PCRE2_EXTENDED and PCRE2_UTF. +Option settings such as (?i) that can change within a pattern do not affect the +result of PCRE2_INFO_ALLOPTIONS, even if they appear right at the start of the +pattern. (This was different in some earlier releases.) +.P +A pattern compiled without PCRE2_ANCHORED is automatically anchored by PCRE2 if +the first significant item in every top-level branch is one of the following: +.sp + ^ unless PCRE2_MULTILINE is set + \eA always + \eG always + .* sometimes - see below +.sp +When .* is the first significant item, anchoring is possible only when all the +following are true: +.sp + .* is not in an atomic group +.\" JOIN + .* is not in a capture group that is the subject + of a backreference + PCRE2_DOTALL is in force for .* + Neither (*PRUNE) nor (*SKIP) appears in the pattern + PCRE2_NO_DOTSTAR_ANCHOR is not set + Dotstar anchoring has not been disabled with PCRE2_DOTSTAR_ANCHOR_OFF +.sp +For patterns that are auto-anchored, the PCRE2_ANCHORED bit is set in the +options returned for PCRE2_INFO_ALLOPTIONS. +.sp + PCRE2_INFO_BACKREFMAX +.sp +Return the number of the highest backreference in the pattern. The third +argument should point to a \fBuint32_t\fP variable. Named capture groups +acquire numbers as well as names, and these count towards the highest +backreference. Backreferences such as \e4 or \eg{12} match the captured +characters of the given group, but in addition, the check that a capture +group is set in a conditional group such as (?(3)a|b) is also a backreference. +Zero is returned if there are no backreferences. +.sp + PCRE2_INFO_BSR +.sp +The output is a uint32_t integer whose value indicates what character sequences +the \eR escape sequence matches. A value of PCRE2_BSR_UNICODE means that \eR +matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means +that \eR matches only CR, LF, or CRLF. +.sp + PCRE2_INFO_CAPTURECOUNT +.sp +Return the highest capture group number in the pattern. In patterns where (?| +is not used, this is also the total number of capture groups. The third +argument should point to a \fBuint32_t\fP variable. +.sp + PCRE2_INFO_DEPTHLIMIT +.sp +If the pattern set a backtracking depth limit by including an item of the form +(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +.sp + PCRE2_INFO_FIRSTBITMAP +.sp +In the absence of a single first code unit for a non-anchored pattern, +\fBpcre2_compile()\fP may construct a 256-bit table that defines a fixed set of +values for the first code unit in any match. For example, a pattern that starts +with [abc] results in a table with three bits set. When code unit values +greater than 255 are supported, the flag bit for 255 means "any code unit of +value 255 or above". If such a table was constructed, a pointer to it is +returned. Otherwise NULL is returned. The third argument should point to a +\fBconst uint8_t *\fP variable. +.sp + PCRE2_INFO_FIRSTCODETYPE +.sp +Return information about the first code unit of any matched string, for a +non-anchored pattern. The third argument should point to a \fBuint32_t\fP +variable. If there is a fixed first value, for example, the letter "c" from a +pattern such as (cat|cow|coyote), 1 is returned, and the value can be retrieved +using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is +known that a match can occur only at the start of the subject or following a +newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0 +is returned. +.sp + PCRE2_INFO_FIRSTCODEUNIT +.sp +Return the value of the first code unit of any matched string for a pattern +where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third +argument should point to a \fBuint32_t\fP variable. In the 8-bit library, the +value is always less than 256. In the 16-bit library the value can be up to +0xffff. In the 32-bit library in UTF-32 mode the value can be up to 0x10ffff, +and up to 0xffffffff when not using UTF-32 mode. +.sp + PCRE2_INFO_FRAMESIZE +.sp +Return the size (in bytes) of the data frames that are used to remember +backtracking positions when the pattern is processed by \fBpcre2_match()\fP +without the use of JIT. The third argument should point to a \fBsize_t\fP +variable. The frame size depends on the number of capturing parentheses in the +pattern. Each additional capture group adds two PCRE2_SIZE variables. +.sp + PCRE2_INFO_HASBACKSLASHC +.sp +Return 1 if the pattern contains any instances of \eC, otherwise 0. The third +argument should point to a \fBuint32_t\fP variable. +.sp + PCRE2_INFO_HASCRORLF +.sp +Return 1 if the pattern contains any explicit matches for CR or LF characters, +otherwise 0. The third argument should point to a \fBuint32_t\fP variable. An +explicit match is either a literal CR or LF character, or \er or \en or one of +the equivalent hexadecimal or octal escape sequences. +.sp + PCRE2_INFO_HEAPLIMIT +.sp +If the pattern set a heap memory limit by including an item of the form +(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +.sp + PCRE2_INFO_JCHANGED +.sp +Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise +0. The third argument should point to a \fBuint32_t\fP variable. (?J) and +(?-J) set and unset the local PCRE2_DUPNAMES option, respectively. +.sp + PCRE2_INFO_JITSIZE +.sp +If the compiled pattern was successfully processed by +\fBpcre2_jit_compile()\fP, return the size of the JIT compiled code, otherwise +return zero. The third argument should point to a \fBsize_t\fP variable. +.sp + PCRE2_INFO_LASTCODETYPE +.sp +Returns 1 if there is a rightmost literal code unit that must exist in any +matched string, other than at its start. The third argument should point to a +\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is +returned, the code unit value itself can be retrieved using +PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is +recorded only if it follows something of variable length. For example, for the +pattern /^a\ed+z\ed+/ the returned value is 1 (with "z" returned from +PCRE2_INFO_LASTCODEUNIT), but for /^a\edz\ed/ the returned value is 0. +.sp + PCRE2_INFO_LASTCODEUNIT +.sp +Return the value of the rightmost literal code unit that must exist in any +matched string, other than at its start, for a pattern where +PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argument +should point to a \fBuint32_t\fP variable. +.sp + PCRE2_INFO_MATCHEMPTY +.sp +Return 1 if the pattern might match an empty string, otherwise 0. The third +argument should point to a \fBuint32_t\fP variable. When a pattern contains +recursive subroutine calls it is not always possible to determine whether or +not it can match an empty string. PCRE2 takes a cautious approach and returns 1 +in such cases. +.sp + PCRE2_INFO_MATCHLIMIT +.sp +If the pattern set a match limit by including an item of the form +(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument +should point to a uint32_t integer. If no such value has been set, the call to +\fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET. Note that this +limit will only be used during matching if it is less than the limit set or +defaulted by the caller of the match function. +.sp + PCRE2_INFO_MAXLOOKBEHIND +.sp +A lookbehind assertion moves back a certain number of characters (not code +units) when it starts to process each of its branches. This request returns the +largest of these backward moves. The third argument should point to a uint32_t +integer. The simple assertions \eb and \eB require a one-character lookbehind +and cause PCRE2_INFO_MAXLOOKBEHIND to return 1 in the absence of anything +longer. \eA also registers a one-character lookbehind, though it does not +actually inspect the previous character. +.P +Note that this information is useful for multi-segment matching only +if the pattern contains no nested lookbehinds. For example, the pattern +(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the +first lookbehind moves back by two characters, matches one character, then the +nested lookbehind also moves back by two characters. This puts the matching +point three characters earlier than it was at the start. +PCRE2_INFO_MAXLOOKBEHIND is really only useful as a debugging tool. See the +.\" HREF +\fBpcre2partial\fP +.\" +documentation for a discussion of multi-segment matching. +.sp + PCRE2_INFO_MINLENGTH +.sp +If a minimum length for matching subject strings was computed, its value is +returned. Otherwise the returned value is 0. This value is not computed when +PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in +UTF mode may be different from the number of code units. The third argument +should point to a \fBuint32_t\fP variable. The value is a lower bound to the +length of any matching string. There may not be any strings of that length that +do actually match, but every string that does match is at least that long. +.sp + PCRE2_INFO_NAMECOUNT + PCRE2_INFO_NAMEENTRYSIZE + PCRE2_INFO_NAMETABLE +.sp +PCRE2 supports the use of named as well as numbered capturing parentheses. The +names are just an additional way of identifying the parentheses, which still +acquire numbers. Several convenience functions such as +\fBpcre2_substring_get_byname()\fP are provided for extracting captured +substrings by name. It is also possible to extract the data directly, by first +converting the name to a number in order to access the correct pointers in the +output vector (described with \fBpcre2_match()\fP below). To do the conversion, +you need to use the name-to-number map, which is described by these three +values. +.P +The map consists of a number of fixed-size entries. PCRE2_INFO_NAMECOUNT gives +the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives the size of each +entry in code units; both of these return a \fBuint32_t\fP value. The entry +size depends on the length of the longest name. +.P +PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table. This is +a PCRE2_SPTR pointer to a block of code units. In the 8-bit library, the first +two bytes of each entry are the number of the capturing parenthesis, most +significant byte first. In the 16-bit library, the pointer points to 16-bit +code units, the first of which contains the parenthesis number. In the 32-bit +library, the pointer points to 32-bit code units, the first of which contains +the parenthesis number. The rest of the entry is the corresponding name, zero +terminated. +.P +The names are in alphabetical order. If (?| is used to create multiple capture +groups with the same number, as described in the +.\" HTML +.\" +section on duplicate group numbers +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page, the groups may be given the same name, but there is only one entry in the +table. Different names for groups of the same number are not permitted. +.P +Duplicate names for capture groups with different numbers are permitted, but +only if PCRE2_DUPNAMES is set. They appear in the table in the order in which +they were found in the pattern. In the absence of (?| this is the order of +increasing number; when (?| is used this is not necessarily the case because +later capture groups may have lower numbers. +.P +As a simple example of the name/number table, consider the following pattern +after compilation by the 8-bit library (assume PCRE2_EXTENDED is set, so white +space - including newlines - is ignored): +.sp +.\" JOIN + (? (?(\ed\ed)?\ed\ed) - + (?\ed\ed) - (?\ed\ed) ) +.sp +There are four named capture groups, so the table has four entries, and each +entry in the table is eight bytes long. The table is as follows, with +non-printing bytes shows in hexadecimal, and undefined bytes shown as ??: +.sp + 00 01 d a t e 00 ?? + 00 05 d a y 00 ?? ?? + 00 04 m o n t h 00 + 00 02 y e a r 00 ?? +.sp +When writing code to extract data from named capture groups using the +name-to-number map, remember that the length of the entries is likely to be +different for each compiled pattern. +.sp + PCRE2_INFO_NEWLINE +.sp +The output is one of the following \fBuint32_t\fP values: +.sp + PCRE2_NEWLINE_CR Carriage return (CR) + PCRE2_NEWLINE_LF Linefeed (LF) + PCRE2_NEWLINE_CRLF Carriage return, linefeed (CRLF) + PCRE2_NEWLINE_ANY Any Unicode line ending + PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF + PCRE2_NEWLINE_NUL The NUL character (binary zero) +.sp +This identifies the character sequence that will be recognized as meaning +"newline" while matching. +.sp + PCRE2_INFO_SIZE +.sp +Return the size of the compiled pattern in bytes (for all three libraries). The +third argument should point to a \fBsize_t\fP variable. This value includes the +size of the general data block that precedes the code units of the compiled +pattern itself. The value that is used when \fBpcre2_compile()\fP is getting +memory in which to place the compiled pattern may be slightly larger than the +value returned by this option, because there are cases where the code that +calculates the size has to over-estimate. Processing a pattern with the JIT +compiler does not alter the value returned by this option. +. +. +.\" HTML +.SH "INFORMATION ABOUT A PATTERN'S CALLOUTS" +.rs +.sp +.nf +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.fi +.sp +A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling \fBpcre2_callout_enumerate()\fP. The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +\fIuser_data\fP value that was passed to \fBpcre2_callout_enumerate()\fP. The +contents of the callout enumeration block are described in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation, which also gives further details about callouts. +. +. +.SH "SERIALIZATION AND PRECOMPILING" +.rs +.sp +It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. The host on which the patterns are +reloaded must be running the same version of PCRE2, with the same code unit +width, and must also have the same endianness, pointer width, and PCRE2_SIZE +type. Before compiled patterns can be saved, they must be converted to a +"serialized" form, which in the case of PCRE2 is really just a bytecode dump. +The functions whose names begin with \fBpcre2_serialize_\fP are used for +converting to and from the serialized form. They are described in the +.\" HREF +\fBpcre2serialize\fP +.\" +documentation. Note that PCRE2 serialization does not convert compiled patterns +to an abstract format like Java or .NET serialization. +. +. +.\" HTML +.SH "THE MATCH DATA BLOCK" +.rs +.sp +.nf +.B pcre2_match_data *pcre2_match_data_create(uint32_t \fIovecsize\fP, +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_match_data *pcre2_match_data_create_from_pattern( +.B " const pcre2_code *\fIcode\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); +.fi +.P +Information about a successful or unsuccessful match is placed in a match +data block, which is an opaque structure that is accessed by function calls. In +particular, the match data block contains a vector of offsets into the subject +string that define the matched parts of the subject. This is known as the +\fIovector\fP. +.P +Before calling \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or +\fBpcre2_jit_match()\fP you must create a match data block by calling one of +the creation functions above. For \fBpcre2_match_data_create()\fP, the first +argument is the number of pairs of offsets in the \fIovector\fP. +.P +When using \fBpcre2_match()\fP, one pair of offsets is required to identify the +string that matched the whole pattern, with an additional pair for each +captured substring. For example, a value of 4 creates enough space to record +the matched portion of the subject plus three captured substrings. +.P +When using \fBpcre2_dfa_match()\fP there may be multiple matched substrings of +different lengths at the same point in the subject. The ovector should be made +large enough to hold as many as are expected. +.P +A minimum of at least 1 pair is imposed by \fBpcre2_match_data_create()\fP, so +it is always possible to return the overall matched string in the case of +\fBpcre2_match()\fP or the longest match in the case of +\fBpcre2_dfa_match()\fP. The maximum number of pairs is 65535; if the first +argument of \fBpcre2_match_data_create()\fP is greater than this, 65535 is +used. +.P +The second argument of \fBpcre2_match_data_create()\fP is a pointer to a +general context, which can specify custom memory management for obtaining the +memory for the match data block. If you are not using custom memory management, +pass NULL, which causes \fBmalloc()\fP to be used. +.P +For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a +pointer to a compiled pattern. The ovector is created to be exactly the right +size to hold all the substrings a pattern might capture when matched using +\fBpcre2_match()\fP. You should not use this call when matching with +\fBpcre2_dfa_match()\fP. The second argument is again a pointer to a general +context, but in this case if NULL is passed, the memory is obtained using the +same allocator that was used for the compiled pattern (custom or default). +.P +A match data block can be used many times, with the same or different compiled +patterns. You can extract information from a match data block after a match +operation has finished, using functions that are described in the sections on +.\" HTML +.\" +matched strings +.\" +and +.\" HTML +.\" +other match data +.\" +below. +.P +When a call of \fBpcre2_match()\fP fails, valid data is available in the match +block only when the error is PCRE2_ERROR_NOMATCH, PCRE2_ERROR_PARTIAL, or one +of the error codes for an invalid UTF string. Exactly what is available depends +on the error, and is detailed below. +.P +When one of the matching functions is called, pointers to the compiled pattern +and the subject string are set in the match data block so that they can be +referenced by the extraction functions after a successful match. After running +a match, you must not free a compiled pattern or a subject string until after +all operations on the match data block (for that match) have taken place, +unless, in the case of the subject string, you have used the +PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled +"Option bits for \fBpcre2_match()\fP" +.\" HTML +.\" +below. +.\" +.P +When a match data block itself is no longer needed, it should be freed by +calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL +argument, it returns immediately, without doing anything. +. +. +.SH "MEMORY USE FOR MATCH DATA BLOCKS" +.rs +.sp +.nf +.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_match_data_heapframes_size( +.B " pcre2_match_data *\fImatch_data\fP);" +.fi +.P +The size of a match data block depends on the size of the ovector that it +contains. The function \fBpcre2_get_match_data_size()\fP returns the size, in +bytes, of the block that is its argument. +.P +When \fBpcre2_match()\fP runs interpretively (that is, without using JIT), it +makes use of a vector of data frames for remembering backtracking positions. +The size of each individual frame depends on the number of capturing +parentheses in the pattern and can be obtained by calling +\fBpcre2_pattern_info()\fP with the PCRE2_INFO_FRAMESIZE option (see the +section entitled "Information about a compiled pattern" +.\" HTML +.\" +above). +.\" +.P +Heap memory is used for the frames vector; if the initial memory block turns +out to be too small during matching, it is automatically expanded. When +\fBpcre2_match()\fP returns, the memory is not freed, but remains attached to +the match data block, for use by any subsequent matches that use the same +block. It is automatically freed when the match data block itself is freed. +.P +You can find the current size of the frames vector that a match data block owns +by calling \fBpcre2_get_match_data_heapframes_size()\fP. For a newly created +match data block the size will be zero. Some types of match may require a lot +of frames and thus a large vector; applications that run in environments where +memory is constrained can check this and free the match data block if the heap +frames vector has become too big. +. +. +.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION" +.rs +.sp +.nf +.B int pcre2_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP);" +.fi +.P +The function \fBpcre2_match()\fP is called to match a subject string against a +compiled pattern, which is passed in the \fIcode\fP argument. You can call +\fBpcre2_match()\fP with the same \fIcode\fP argument as many times as you +like, in order to find multiple matches in the subject string or to match +different subject strings with the same pattern. +.P +This function is the main matching facility of the library, and it operates in +a Perl-like manner. For specialist use there is also an alternative matching +function, which is described +.\" HTML +.\" +below +.\" +in the section about the \fBpcre2_dfa_match()\fP function. +.P +Here is an example of a simple call to \fBpcre2_match()\fP: +.sp + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL); /* a match context; NULL means use defaults */ +.sp +If the subject string is zero-terminated, the length can be given as +PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common +matching parameters are to be changed. For details, see the section on +.\" HTML +.\" +the match context +.\" +above. +. +. +.SS "The string to be matched by \fBpcre2_match()\fP" +.rs +.sp +The subject string is passed to \fBpcre2_match()\fP as a pointer in +\fIsubject\fP, a length in \fIlength\fP, and a starting offset in +\fIstartoffset\fP. The length and offset are in code units, not characters. +That is, they are in bytes for the 8-bit library, 16-bit code units for the +16-bit library, and 32-bit code units for the 32-bit library, whether or not +UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and +\fIlength\fP is zero, the subject is assumed to be an empty string. If +\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL. +.P +If \fIstartoffset\fP is greater than the length of the subject, +\fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is +zero, the search for a match starts at the beginning of the subject, and this +is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset +must point to the start of a character, or to the end of the subject (in UTF-32 +mode, one code unit equals one character, so all offsets are valid). Like the +pattern string, the subject may contain binary zeros. +.P +A non-zero starting offset is useful when searching for another match in the +same subject by calling \fBpcre2_match()\fP again after a previous success. +Setting \fIstartoffset\fP differs from passing over a shortened string and +setting PCRE2_NOTBOL in the case of a pattern that begins with any kind of +lookbehind. For example, consider the pattern +.sp + \eBiss\eB +.sp +which finds occurrences of "iss" in the middle of words. (\eB matches only if +the current position in the subject is not a word boundary.) When applied to +the string "Mississippi" the first call to \fBpcre2_match()\fP finds the first +occurrence. If \fBpcre2_match()\fP is called again with just the remainder of +the subject, namely "issippi", it does not match, because \eB is always false +at the start of the subject, which is deemed to be a word boundary. However, if +\fBpcre2_match()\fP is passed the entire string again, but with +\fIstartoffset\fP set to 4, it finds the second occurrence of "iss" because it +is able to look behind the starting point to discover that it is preceded by a +letter. +.P +Finding all the matches in a subject is tricky when the pattern can match an +empty string. It is possible to emulate Perl's /g behaviour by first trying the +match again at the same offset, with the PCRE2_NOTEMPTY_ATSTART and +PCRE2_ANCHORED options, and then if that fails, advancing the starting offset +and trying an ordinary match again. There is some code that demonstrates how to +do this in the +.\" HREF +\fBpcre2demo\fP +.\" +sample program. In the most general case, you have to check to see if the +newline convention recognizes CRLF as a newline, and if so, and the current +character is CR followed by LF, advance the starting offset by two characters +instead of one. +.P +If a non-zero starting offset is passed when the pattern is anchored, a single +attempt to match at the given offset is made. This can only succeed if the +pattern does not require the match to be at the start of the subject. In other +words, the anchoring must be the result of setting the PCRE2_ANCHORED option or +the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA. +. +. +.\" HTML +.SS "Option bits for \fBpcre2_match()\fP" +.rs +.sp +The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be +zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_ENDANCHORED, +PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, +PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. +Their action is described below. +.P +Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by +the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the +interpretive code in \fBpcre2_match()\fP is run. +PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from PCRE2_NO_JIT +(obviously), the remaining options are supported for JIT matching. +.sp + PCRE2_ANCHORED +.sp +The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first +matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out +to be anchored by virtue of its contents, it cannot be made unachored at +matching time. Note that setting the option at match time disables JIT +matching. +.sp + PCRE2_COPY_MATCHED_SUBJECT +.sp +By default, a pointer to the subject is remembered in the match data block so +that, after a successful match, it can be referenced by the substring +extraction functions. This means that the subject's memory must not be freed +until all such operations are complete. For some applications where the +lifetime of the subject string is not guaranteed, it may be necessary to make a +copy of the subject string, but it is wasteful to do this unless the match is +successful. After a successful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the +subject is copied and the new pointer is remembered in the match data block +instead of the original subject pointer. The memory allocator that was used for +the match block itself is used. The copy is automatically freed when +\fBpcre2_match_data_free()\fP is called to free the match data block. It is also +automatically freed if the match data block is re-used for another match +operation. +.sp + PCRE2_DISABLE_RECURSELOOP_CHECK +.sp +This option is relevant only to \fBpcre2_match()\fP for interpretive matching. +It is ignored when JIT is used, and is forbidden for \fBpcre2_dfa_match()\fP. +.P +The use of recursion in patterns can lead to infinite loops. In the +interpretive matcher these would be eventually caught by the match or heap +limits, but this could take a long time and/or use a lot of memory if the +limits are large. There is therefore a check at the start of each recursion. +If the same group is still active from a previous call, and the current subject +pointer is the same as it was at the start of that group, and the furthest +inspected character of the subject has not changed, an error is generated. +.P +There are rare cases of matches that would complete, but nevertheless trigger +this error. This option disables the check. It is provided mainly for testing +when comparing JIT and interpretive behaviour. +.sp + PCRE2_ENDANCHORED +.sp +If the PCRE2_ENDANCHORED option is set, any string that \fBpcre2_match()\fP +matches must be right at the end of the subject string. Note that setting the +option at match time disables JIT matching. +.sp + PCRE2_NOTBOL +.sp +This option specifies that first character of the subject string is not the +beginning of a line, so the circumflex metacharacter should not match before +it. Setting this without having set PCRE2_MULTILINE at compile time causes +circumflex never to match. This option affects only the behaviour of the +circumflex metacharacter. It does not affect \eA. +.sp + PCRE2_NOTEOL +.sp +This option specifies that the end of the subject string is not the end of a +line, so the dollar metacharacter should not match it nor (except in multiline +mode) a newline immediately before it. Setting this without having set +PCRE2_MULTILINE at compile time causes dollar never to match. This option +affects only the behaviour of the dollar metacharacter. It does not affect \eZ +or \ez. +.sp + PCRE2_NOTEMPTY +.sp +An empty string is not considered to be a valid match if this option is set. If +there are alternatives in the pattern, they are tried. If all the alternatives +match the empty string, the entire match fails. For example, if the pattern +.sp + a?b? +.sp +is applied to a string not beginning with "a" or "b", it matches an empty +string at the start of the subject. With PCRE2_NOTEMPTY set, this match is not +valid, so \fBpcre2_match()\fP searches further into the string for occurrences +of "a" or "b". +.sp + PCRE2_NOTEMPTY_ATSTART +.sp +This is like PCRE2_NOTEMPTY, except that it locks out an empty string match +only at the first matching position, that is, at the start of the subject plus +the starting offset. An empty string match later in the subject is permitted. +If the pattern is anchored, such a match can occur only if the pattern contains +\eK. +.sp + PCRE2_NO_JIT +.sp +By default, if a pattern has been successfully processed by +\fBpcre2_jit_compile()\fP, JIT is automatically used when \fBpcre2_match()\fP +is called with options that JIT supports. Setting PCRE2_NO_JIT disables the use +of JIT; it forces matching to be done by the interpreter. +.sp + PCRE2_NO_UTF_CHECK +.sp +When PCRE2_UTF is set at compile time, the validity of the subject as a UTF +string is checked unless PCRE2_NO_UTF_CHECK is passed to \fBpcre2_match()\fP or +PCRE2_MATCH_INVALID_UTF was passed to \fBpcre2_compile()\fP. The latter special +case is discussed in detail in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +In the default case, if a non-zero starting offset is given, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \eb and \eB are +one-character lookbehinds. +.P +The check is carried out before any other processing takes place, and a +negative error code is returned if the check fails. There are several UTF error +codes for each code unit width, corresponding to different problems with the +code unit sequence. There are discussions about the validity of +.\" HTML +.\" +UTF-8 strings, +.\" +.\" HTML +.\" +UTF-16 strings, +.\" +and +.\" HTML +.\" +UTF-32 strings +.\" +in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +If you know that your subject is valid, and you want to skip this check for +performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling +\fBpcre2_match()\fP. You might want to do this for the second and subsequent +calls to \fBpcre2_match()\fP if you are making repeated calls to find multiple +matches in the same subject string. +.P +\fBWarning:\fP Unless PCRE2_MATCH_INVALID_UTF was set at compile time, when +PCRE2_NO_UTF_CHECK is set at match time the effect of passing an invalid +string as a subject, or an invalid value of \fIstartoffset\fP, is undefined. +Your program may crash or loop indefinitely or give wrong results. +.sp + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT +.sp +These options turn on the partial matching feature. A partial match occurs if +the end of the subject string is reached successfully, but there are not enough +subject characters to complete the match. In addition, either at least one +character must have been inspected or the pattern must contain a lookbehind, or +the pattern must be one that could match an empty string. +.P +If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD) +is set, matching continues by testing any remaining alternatives. Only if no +complete match can be found is PCRE2_ERROR_PARTIAL returned instead of +PCRE2_ERROR_NOMATCH. In other words, PCRE2_PARTIAL_SOFT specifies that the +caller is prepared to handle a partial match, but only if no complete match can +be found. +.P +If PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this case, if +a partial match is found, \fBpcre2_match()\fP immediately returns +PCRE2_ERROR_PARTIAL, without considering any other alternatives. In other +words, when PCRE2_PARTIAL_HARD is set, a partial match is considered to be more +important that an alternative complete match. +.P +There is a more detailed discussion of partial and multi-segment matching, with +examples, in the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +. +. +. +.SH "NEWLINE HANDLING WHEN MATCHING" +.rs +.sp +When PCRE2 is built, a default newline convention is set; this is usually the +standard convention for the operating system. The default can be overridden in +a +.\" HTML +.\" +compile context +.\" +by calling \fBpcre2_set_newline()\fP. It can also be overridden by starting a +pattern string with, for example, (*CRLF), as described in the +.\" HTML +.\" +section on newline conventions +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. During matching, the newline choice affects the behaviour of the dot, +circumflex, and dollar metacharacters. It may also alter the way the match +starting position is advanced after a match failure for an unanchored pattern. +.P +When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is set as +the newline convention, and a match attempt for an unanchored pattern fails +when the current starting position is at a CRLF sequence, and the pattern +contains no explicit matches for CR or LF characters, the match position is +advanced by two characters instead of one, in other words, to after the CRLF. +.P +The above rule is a compromise that makes the most common cases work as +expected. For example, if the pattern is .+A (and the PCRE2_DOTALL option is +not set), it does not match the string "\er\enA" because, after failing at the +start, it skips both the CR and the LF before retrying. However, the pattern +[\er\en]A does match that string, because it contains an explicit CR or LF +reference, and so advances only by one character after the first failure. +.P +An explicit match for CR of LF is either a literal appearance of one of those +characters in the pattern, or one of the \er or \en or equivalent octal or +hexadecimal escape sequences. Implicit matches such as [^X] do not count, nor +does \es, even though it includes CR and LF in the characters that it matches. +.P +Notwithstanding the above, anomalous effects may still occur when CRLF is a +valid newline sequence and explicit \er or \en escapes appear in the pattern. +. +. +.\" HTML +.SH "HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS" +.rs +.sp +.nf +.B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP); +.fi +.P +In general, a pattern matches a certain portion of the subject, and in +addition, further substrings from the subject may be picked out by +parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's +book, this is called "capturing" in what follows, and the phrase "capture +group" (Perl terminology) is used for a fragment of a pattern that picks out a +substring. PCRE2 supports several other kinds of parenthesized group that do +not cause substrings to be captured. The \fBpcre2_pattern_info()\fP function +can be used to find out how many capture groups there are in a compiled +pattern. +.P +You can use auxiliary functions for accessing captured substrings +.\" HTML +.\" +by number +.\" +or +.\" HTML +.\" +by name, +.\" +as described in sections below. +.P +Alternatively, you can make direct use of the vector of PCRE2_SIZE values, +called the \fBovector\fP, which contains the offsets of captured strings. It is +part of the +.\" HTML +.\" +match data block. +.\" +The function \fBpcre2_get_ovector_pointer()\fP returns the address of the +ovector, and \fBpcre2_get_ovector_count()\fP returns the number of pairs of +values it contains. +.P +Within the ovector, the first in each pair of values is set to the offset of +the first code unit of a substring, and the second is set to the offset of the +first code unit after the end of a substring. These values are always code unit +offsets, not character offsets. That is, they are byte offsets in the 8-bit +library, 16-bit offsets in the 16-bit library, and 32-bit offsets in the 32-bit +library. +.P +After a partial match (error return PCRE2_ERROR_PARTIAL), only the first pair +of offsets (that is, \fIovector[0]\fP and \fIovector[1]\fP) are set. They +identify the part of the subject that was partially matched. See the +.\" HREF +\fBpcre2partial\fP +.\" +documentation for details of partial matching. +.P +After a fully successful match, the first pair of offsets identifies the +portion of the subject string that was matched by the entire pattern. The next +pair is used for the first captured substring, and so on. The value returned by +\fBpcre2_match()\fP is one more than the highest numbered pair that has been +set. For example, if two substrings have been captured, the returned value is +3. If there are no captured substrings, the return value from a successful +match is 1, indicating that just the first pair of offsets has been set. +.P +If a pattern uses the \eK escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\eK) is matched against "ab", the start and +end offset values for the match are 2 and 0. +.P +If a capture group is matched repeatedly within a single match operation, it is +the last portion of the subject that it matched that is returned. +.P +If the ovector is too small to hold all the captured substring offsets, as much +as possible is filled in, and the function returns a value of zero. If captured +substrings are not of interest, \fBpcre2_match()\fP may be called with a match +data block whose ovector is of minimum length (that is, one pair). +.P +It is possible for capture group number \fIn+1\fP to match some part of the +subject when group \fIn\fP has not been used at all. For example, if the string +"abc" is matched against the pattern (a|(z))(bc) the return from the function +is 4, and groups 1 and 3 are matched, but 2 is not. When this happens, both +values in the offset pairs corresponding to unused groups are set to +PCRE2_UNSET. +.P +Offset values that correspond to unused groups at the end of the expression are +also set to PCRE2_UNSET. For example, if the string "abc" is matched against +the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the +function is 2, because the highest used capture group number is 1. The offsets +for the second and third capture groups (assuming the vector is large enough, +of course) are set to PCRE2_UNSET. +.P +Elements in the ovector that do not correspond to capturing parentheses in the +pattern are never changed. That is, if a pattern contains \fIn\fP capturing +parentheses, no more than \fIovector[0]\fP to \fIovector[2n+1]\fP are set by +\fBpcre2_match()\fP. The other elements retain whatever values they previously +had. After a failed match attempt, the contents of the ovector are unchanged. +. +. +.\" HTML +.SH "OTHER INFORMATION ABOUT A MATCH" +.rs +.sp +.nf +.B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP); +.sp +.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); +.fi +.P +As well as the offsets in the ovector, other information about a match is +retained in the match data block and can be retrieved by the above functions in +appropriate circumstances. If they are called at other times, the result is +undefined. +.P +After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure +to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function +\fBpcre2_get_mark()\fP can be called to access this name, which can be +specified in the pattern by any of the backtracking control verbs, not just +(*MARK). The same function applies to all the verbs. It returns a pointer to +the zero-terminated name, which is within the compiled pattern. If no name is +available, NULL is returned. The length of the name (excluding the terminating +zero) is stored in the code unit that precedes the name. You should use this +length instead of relying on the terminating zero if the name might contain a +binary zero. +.P +After a successful match, the name that is returned is the last mark name +encountered on the matching path through the pattern. Instances of backtracking +verbs without names do not count. Thus, for example, if the matching path +contains (*MARK:A)(*PRUNE), the name "A" is returned. After a "no match" or a +partial match, the last encountered name is returned. For example, consider +this pattern: +.sp + ^(*MARK:A)((*MARK:B)a|b)c +.sp +When it matches "bc", the returned name is A. The B mark is "seen" in the first +branch of the group, but it is not on the matching path. On the other hand, +when this pattern fails to match "bx", the returned name is B. +.P +\fBWarning:\fP By default, certain start-of-match optimizations are used to +give a fast "no match" result in some situations. For example, if the anchoring +is removed from the pattern above, there is an initial check for the presence +of "c" in the subject before running the matching engine. This check fails for +"bx", causing a match failure without seeing any marks. You can disable the +start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for +\fBpcre2_compile()\fP or by starting the pattern with (*NO_START_OPT). +.P +After a successful match, a partial match, or one of the invalid UTF errors +(for example, PCRE2_ERROR_UTF8_ERR5), \fBpcre2_get_startchar()\fP can be +called. After a successful or partial match it returns the code unit offset of +the character at which the match started. For a non-partial match, this can be +different to the value of \fIovector[0]\fP if the pattern contains the \eK +escape sequence. After a partial match, however, this value is always the same +as \fIovector[0]\fP because \eK does not affect the result of a partial match. +.P +After a UTF check failure, \fBpcre2_get_startchar()\fP can be used to obtain +the code unit offset of the invalid UTF character. Details are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +. +. +.\" HTML +.SH "ERROR RETURNS FROM \fBpcre2_match()\fP" +.rs +.sp +If \fBpcre2_match()\fP fails, it returns a negative number. This can be +converted to a text string by calling the \fBpcre2_get_error_message()\fP +function (see "Obtaining a textual error message" +.\" HTML +.\" +below). +.\" +Negative error codes are also returned by other functions, and are documented +with them. The codes are given names in the header file. If UTF checking is in +force and an invalid UTF subject string is detected, one of a number of +UTF-specific negative error codes is returned. Details are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. The following are the other errors that may be returned by +\fBpcre2_match()\fP: +.sp + PCRE2_ERROR_NOMATCH +.sp +The subject string did not match the pattern. +.sp + PCRE2_ERROR_PARTIAL +.sp +The subject string did not match, but it did match partially. See the +.\" HREF +\fBpcre2partial\fP +.\" +documentation for details of partial matching. +.sp + PCRE2_ERROR_BADMAGIC +.sp +PCRE2 stores a 4-byte "magic number" at the start of the compiled code, to +catch the case when it is passed a junk pointer. This is the error that is +returned when the magic number is not present. +.sp + PCRE2_ERROR_BADMODE +.sp +This error is given when a compiled pattern is passed to a function in a +library of a different code unit width, for example, a pattern compiled by +the 8-bit library is passed to a 16-bit or 32-bit library function. +.sp + PCRE2_ERROR_BADOFFSET +.sp +The value of \fIstartoffset\fP was greater than the length of the subject. +.sp + PCRE2_ERROR_BADOPTION +.sp +An unrecognized bit was set in the \fIoptions\fP argument. +.sp + PCRE2_ERROR_BADUTFOFFSET +.sp +The UTF code unit sequence that was passed as a subject was checked and found +to be valid (the PCRE2_NO_UTF_CHECK option was not set), but the value of +\fIstartoffset\fP did not point to the beginning of a UTF character or the end +of the subject. +.sp + PCRE2_ERROR_CALLOUT +.sp +This error is never generated by \fBpcre2_match()\fP itself. It is provided for +use by callout functions that want to cause \fBpcre2_match()\fP or +\fBpcre2_callout_enumerate()\fP to return a distinctive error code. See the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details. +.sp + PCRE2_ERROR_DEPTHLIMIT +.sp +The nested backtracking depth limit was reached. +.sp + PCRE2_ERROR_HEAPLIMIT +.sp +The heap limit was reached. +.sp + PCRE2_ERROR_INTERNAL +.sp +An unexpected internal error has occurred. This error could be caused by a bug +in PCRE2 or by overwriting of the compiled pattern. +.sp + PCRE2_ERROR_JIT_STACKLIMIT +.sp +This error is returned when a pattern that was successfully studied using JIT +is being matched, but the memory available for the just-in-time processing +stack is not large enough. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for more details. +.sp + PCRE2_ERROR_MATCHLIMIT +.sp +The backtracking match limit was reached. +.sp + PCRE2_ERROR_NOMEMORY +.sp +Heap memory is used to remember backtracking points. This error is given when +the memory allocation function (default or custom) fails. Note that a different +error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds +the heap limit. PCRE2_ERROR_NOMEMORY is also returned if +PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. +.sp + PCRE2_ERROR_NULL +.sp +Either the \fIcode\fP, \fIsubject\fP, or \fImatch_data\fP argument was passed +as NULL. +.sp + PCRE2_ERROR_RECURSELOOP +.sp +This error is returned when \fBpcre2_match()\fP detects a recursion loop within +the pattern. Specifically, it means that either the whole pattern or a +capture group has been called recursively for the second time at the same +position in the subject string. Some simple patterns that might do this are +detected and faulted at compile time, but more complicated cases, in particular +mutual recursions between two different groups, cannot be detected until +matching is attempted. +. +. +.\" HTML +.SH "OBTAINING A TEXTUAL ERROR MESSAGE" +.rs +.sp +.nf +.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP, +.B " PCRE2_SIZE \fIbufflen\fP);" +.fi +.P +A text message for an error code from any PCRE2 function (compile, match, or +auxiliary) can be obtained by calling \fBpcre2_get_error_message()\fP. The code +is passed as the first argument, with the remaining two arguments specifying a +code unit buffer and its length in code units, into which the text message is +placed. The message is returned in code units of the appropriate width for the +library that is being used. +.P +The returned message is terminated with a trailing zero, and the function +returns the number of code units used, excluding the trailing zero. If the +error number is unknown, the negative error code PCRE2_ERROR_BADDATA is +returned. If the buffer is too small, the message is truncated (but still with +a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned. +None of the messages are very long; a buffer size of 120 code units is ample. +. +. +.\" HTML +.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER" +.rs +.sp +.nf +.B int pcre2_substring_length_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_copy_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR *\fIbuffer\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_get_bynumber(pcre2_match_data *\fImatch_data\fP, +.B " uint32_t \fInumber\fP, PCRE2_UCHAR **\fIbufferptr\fP," +.B " PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +.fi +.P +Captured substrings can be accessed directly by using the ovector as described +.\" HTML +.\" +above. +.\" +For convenience, auxiliary functions are provided for extracting captured +substrings as new, separate, zero-terminated strings. A substring that contains +a binary zero is correctly extracted and has a further zero added on the end, +but the result is not, of course, a C string. +.P +The functions in this section identify substrings by number. The number zero +refers to the entire matched substring, with higher numbers referring to +substrings captured by parenthesized groups. After a partial match, only +substring zero is available. An attempt to extract any other substring gives +the error PCRE2_ERROR_PARTIAL. The next section describes similar functions for +extracting captured substrings by name. +.P +If a pattern uses the \eK escape sequence within a positive assertion, the +reported start of a successful match can be greater than the end of the match. +For example, if the pattern (?=ab\eK) is matched against "ab", the start and +end offset values for the match are 2 and 0. In this situation, calling these +functions with a zero substring number extracts a zero-length empty string. +.P +You can find the length in code units of a captured substring without +extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first +argument is a pointer to the match data block, the second is the group number, +and the third is a pointer to a variable into which the length is placed. If +you just want to know whether or not the substring has been captured, you can +pass the third argument as NULL. +.P +The \fBpcre2_substring_copy_bynumber()\fP function copies a captured substring +into a supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it +into new memory, obtained using the same memory allocation function that was +used for the match data block. The first two arguments of these functions are a +pointer to the match data block and a capture group number. +.P +The final arguments of \fBpcre2_substring_copy_bynumber()\fP are a pointer to +the buffer and a pointer to a variable that contains its length in code units. +This is updated to contain the actual number of code units used for the +extracted substring, excluding the terminating zero. +.P +For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point +to variables that are updated with a pointer to the new memory and the number +of code units that comprise the substring, again excluding the terminating +zero. When the substring is no longer needed, the memory should be freed by +calling \fBpcre2_substring_free()\fP. +.P +The return value from all these functions is zero for success, or a negative +error code. If the pattern match failed, the match failure code is returned. +If a substring number greater than zero is used after a partial match, +PCRE2_ERROR_PARTIAL is returned. Other possible error codes are: +.sp + PCRE2_ERROR_NOMEMORY +.sp +The buffer was too small for \fBpcre2_substring_copy_bynumber()\fP, or the +attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP. +.sp + PCRE2_ERROR_NOSUBSTRING +.sp +There is no substring with that number in the pattern, that is, the number is +greater than the number of capturing parentheses. +.sp + PCRE2_ERROR_UNAVAILABLE +.sp +The substring number, though not greater than the number of captures in the +pattern, is greater than the number of slots in the ovector, so the substring +could not be captured. +.sp + PCRE2_ERROR_UNSET +.sp +The substring did not participate in the match. For example, if the pattern is +(abc)|(def) and the subject is "def", and the ovector contains at least two +capturing slots, substring number 1 is unset. +. +. +.SH "EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS" +.rs +.sp +.nf +.B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP); +.sp +.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP); +.fi +.P +The \fBpcre2_substring_list_get()\fP function extracts all available substrings +and builds a list of pointers to them. It also (optionally) builds a second +list that contains their lengths (in code units), excluding a terminating zero +that is added to each of them. All this is done in a single block of memory +that is obtained using the same memory allocation function that was used to get +the match data block. +.P +This function must be called only after a successful match. If called after a +partial match, the error code PCRE2_ERROR_PARTIAL is returned. +.P +The address of the memory block is returned via \fIlistptr\fP, which is also +the start of the list of string pointers. The end of the list is marked by a +NULL pointer. The address of the list of lengths is returned via +\fIlengthsptr\fP. If your strings do not contain binary zeros and you do not +therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP +argument to disable the creation of a list of lengths. The yield of the +function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block +could not be obtained. When the list is no longer needed, it should be freed by +calling \fBpcre2_substring_list_free()\fP. +.P +If this function encounters a substring that is unset, which can happen when +capture group number \fIn+1\fP matches some part of the subject, but group +\fIn\fP has not been used at all, it returns an empty string. This can be +distinguished from a genuine zero-length substring by inspecting the +appropriate offset in the ovector, which contain PCRE2_UNSET for unset +substrings, or by calling \fBpcre2_substring_length_bynumber()\fP. +. +. +.\" HTML +.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME" +.rs +.sp +.nf +.B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP);" +.sp +.B int pcre2_substring_length_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SIZE *\fIlength\fP);" +.sp +.B int pcre2_substring_copy_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR *\fIbuffer\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B int pcre2_substring_get_byname(pcre2_match_data *\fImatch_data\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_UCHAR **\fIbufferptr\fP, PCRE2_SIZE *\fIbufflen\fP);" +.sp +.B void pcre2_substring_free(PCRE2_UCHAR *\fIbuffer\fP); +.fi +.P +To extract a substring by name, you first have to find associated number. +For example, for this pattern: +.sp + (a+)b(?\ed+)... +.sp +the number of the capture group called "xxx" is 2. If the name is known to be +unique (PCRE2_DUPNAMES was not set), you can find the number from the name by +calling \fBpcre2_substring_number_from_name()\fP. The first argument is the +compiled pattern, and the second is the name. The yield of the function is the +group number, PCRE2_ERROR_NOSUBSTRING if there is no group with that name, or +PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one group with that name. +Given the number, you can extract the substring directly from the ovector, or +use one of the "bynumber" functions described above. +.P +For convenience, there are also "byname" functions that correspond to the +"bynumber" functions, the only difference being that the second argument is a +name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate +names, these functions scan all the groups with the given name, and return the +captured substring from the first named group that is set. +.P +If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is +returned. If all groups with the name have numbers that are greater than the +number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there +is at least one group with a slot in the ovector, but no group is found to be +set, PCRE2_ERROR_UNSET is returned. +.P +\fBWarning:\fP If the pattern uses the (?| feature to set up multiple +capture groups with the same number, as described in the +.\" HTML +.\" +section on duplicate group numbers +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +page, you cannot use names to distinguish the different capture groups, because +names are not included in the compiled code. The matching process uses only +numbers. For this reason, the use of different names for groups with the +same number causes an error at compile time. +. +. +.\" HTML +.SH "CREATING A NEW STRING WITH SUBSTITUTIONS" +.rs +.sp +.nf +.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP," +.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP," +.B " PCRE2_SIZE *\fIoutlengthptr\fP);" +.fi +.P +This function optionally calls \fBpcre2_match()\fP and then makes a copy of the +subject string in \fIoutputbuffer\fP, replacing parts that were matched with +the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the +replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an +error occurs if \fIreplacement\fP is NULL. +.P +There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). +.P +If successful, \fBpcre2_substitute()\fP returns the number of substitutions +that were carried out. This may be zero if no match was found, and is never +greater than one unless PCRE2_SUBSTITUTE_GLOBAL is set. A negative value is +returned if an error is detected. +.P +Matches in which a \eK item in a lookahead in the pattern causes the match to +end before it starts are not supported, and give rise to an error return. For +global replacements, matches in which \eK in a lookbehind causes the match to +start earlier than the point that was reached in the previous iteration are +also not supported. +.P +The first seven arguments of \fBpcre2_substitute()\fP are the same as for +\fBpcre2_match()\fP, except that the partial matching options are not +permitted, and \fImatch_data\fP may be passed as NULL, in which case a match +data block is obtained and freed within this function, using memory management +functions from the match context, if provided, or else those that were used to +allocate memory for the compiled code. +.P +If \fImatch_data\fP is not NULL and PCRE2_SUBSTITUTE_MATCHED is not set, the +provided block is used for all calls to \fBpcre2_match()\fP, and its contents +afterwards are the result of the final call. For global changes, this will +always be a no-match error. The contents of the ovector within the match data +block may or may not have been changed. +.P +As well as the usual options for \fBpcre2_match()\fP, a number of additional +options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP. +One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external +\fImatch_data\fP block must be provided, and it must have already been used for +an external call to \fBpcre2_match()\fP with the same pattern and subject +arguments. The data in the \fImatch_data\fP block (return code, offset vector) +is then used for the first substitution instead of calling \fBpcre2_match()\fP +from within \fBpcre2_substitute()\fP. This allows an application to check for a +match before choosing to substitute, without having to repeat the match. +.P +The contents of the externally supplied match data block are not changed when +PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set, +\fBpcre2_match()\fP is called after the first substitution to check for further +matches, but this is done using an internally obtained match data block, thus +always leaving the external block unchanged. +.P +The \fIcode\fP argument is not used for matching before the first substitution +when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, even when +PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains information such as the +UTF setting and the number of capturing parentheses in the pattern. +.P +The default action of \fBpcre2_substitute()\fP is to return a copy of the +subject string with matched substrings replaced. However, if +PCRE2_SUBSTITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are +returned. In the global case, multiple replacements are concatenated in the +output buffer. Substitution callouts (see +.\" HTML +.\" +below) +.\" +can be used to separate them if necessary. +.P +The \fIoutlengthptr\fP argument of \fBpcre2_substitute()\fP must point to a +variable that contains the length, in code units, of the output buffer. If the +function is successful, the value is updated to contain the length in code +units of the new string, excluding the trailing zero that is automatically +added. +.P +If the function is not successful, the value set via \fIoutlengthptr\fP depends +on the type of error. For syntax errors in the replacement string, the value is +the offset in the replacement string where the error was detected. For other +errors, the value is PCRE2_UNSET by default. This includes the case of the +output buffer being too small, unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. +.P +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is +too small. The default action is to return PCRE2_ERROR_NOMEMORY immediately. If +this option is set, however, \fBpcre2_substitute()\fP continues to go through +the motions of matching and substituting (without, of course, writing anything) +in order to compute the size of buffer that is needed, which will include the +extra space for the terminating NUL. This value is passed back via the +\fIoutlengthptr\fP variable, with the result of the function still being +PCRE2_ERROR_NOMEMORY. +.P +Passing a buffer size of zero is a permitted way of finding out how much memory +is needed for given substitution. However, this does mean that the entire +operation is carried out twice. Depending on the application, it may be more +efficient to allocate a large buffer and free the excess afterwards, instead of +using PCRE2_SUBSTITUTE_OVERFLOW_LENGTH. +.P +The replacement string, which is interpreted as a UTF string in UTF mode, is +checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF +replacement string causes an immediate return with the relevant UTF error code. +.P +If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted +in any way. By default, however, a dollar character is an escape character that +can specify the insertion of characters from capture groups and names from +(*MARK) or other control verbs in the pattern. Dollar is the only escape +character (backslash is treated as literal). The following forms are +recognized: +.sp + $$ insert a dollar character + $n or ${n} insert the contents of group \fIn\fP + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string + $*MARK or ${*MARK} insert a control verb name +.sp +Either a group number or a group name can be given for \fIn\fP, for example $2 or +$NAME. Curly brackets are required only if the following character would be +interpreted as part of the number or name. The number may be zero to include +the entire matched string. For example, if the pattern a(b)c is matched with +"=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=". +.P +The JavaScript form $, where the angle brackets are part of the syntax, +is also recognized for group names, but not for group numbers or *MARK. +.P +$*MARK inserts the name from the last encountered backtracking control verb on +the matching path that has a name. (*MARK) must always include a name, but the +other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name +inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B". This +facility can be used to perform simple simultaneous substitutions, as this +\fBpcre2test\fP example shows: +.sp + /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange +.sp +PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string, +replacing every matching substring. If this option is not set, only the first +matching substring is replaced. The search for matches takes place in the +original subject string (that is, previous replacements do not affect it). +Iteration is implemented by advancing the \fIstartoffset\fP value for each +search, which is always passed the entire subject string. If an offset limit is +set in the match context, searching stops when that limit is reached. +.P +You can restrict the effect of a global substitution to a portion of the +subject string by setting either or both of \fIstartoffset\fP and an offset +limit. Here is a \fBpcre2test\fP example: +.sp + /B/g,replace=!,use_offset_limit + ABC ABC ABC ABC\e=offset=3,offset_limit=12 + 2: ABC A!C A!C ABC +.sp +When continuing with global substitutions after matching a substring with zero +length, an attempt to find a non-empty match at the same offset is performed. +If this is not successful, the offset is advanced by one character except when +CRLF is a valid newline sequence and the next two characters are CR, LF. In +this case, the offset is advanced by two characters. +.P +PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do +not appear in the pattern to be treated as unset groups. This option should be +used with care, because it means that a typo in a group name or number no +longer causes the PCRE2_ERROR_NOSUBSTRING error. +.P +PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including unknown +groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated as empty +strings when inserted as described above. If this option is not set, an attempt +to insert an unset group causes the PCRE2_ERROR_UNSET error. This option does +not influence the extended substitution syntax described below. +.P +PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the +replacement string. Without this option, only the dollar character is special, +and only the group insertion forms listed above are valid. When +PCRE2_SUBSTITUTE_EXTENDED is set, several things change: +.P +Firstly, backslash in a replacement string is interpreted as an escape +character. The usual forms such as \ex{ddd} can be used to specify particular +character codes, and backslash followed by any non-alphanumeric character +quotes that character. Extended quoting can be coded using \eQ...\eE, exactly +as in pattern strings. The escapes \eb and \ev are interpreted as the +characters backspace and vertical tab, respectively. +.P +The interpretation of backslash followed by one or more digits is the same as +in a pattern, which in Perl has some ambiguities. Details are given in the +.\" HREF +\fBpcre2pattern\fP +.\" +page. +.P +The Python form \eg, where the angle brackets are part of the syntax and \fIn\fP +is either a group name or number, is recognized as an altertive way of +inserting the contents of a group, for example \eg<3>. +.P +There are also four escape sequences for forcing the case of inserted letters. +Case forcing applies to all inserted characters, including those from capture +groups and letters within \eQ...\eE quoted sequences. The insertion mechanism +has three states: no case forcing, force upper case, and force lower case. The +escape sequences change the current state: \eU and \eL change to upper or lower +case forcing, respectively, and \eE (when not terminating a \eQ quoted +sequence) reverts to no case forcing. The sequences \eu and \el force the next +character (if it is a letter) to upper or lower case, respectively, and then +the state automatically reverts to no case forcing. +.P +However, if \eu is immediately followed by \eL or \el is immediately followed +by \eU, the next character's case is forced by the first escape sequence, and +subsequent characters by the second. This provides a "title casing" facility +that can be applied to group captures. For example, if group 1 has captured +"heLLo", the replacement string "\eu\eL$1" becomes "Hello". +.P +If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode +properties are used for case forcing characters whose code points are greater +than 127. However, only simple case folding, as determined by the Unicode file +\fBCaseFolding.txt\fP is supported. PCRE2 does not support language-specific +special casing rules such as using different lower case Greek sigmas in the +middle and ends of words (as defined in the Unicode file +\fBSpecialCasing.txt\fP). +.P +Note that case forcing sequences such as \eU...\eE do not nest. For example, +the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no +effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do +not apply to replacement strings. +.P +The final effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more +flexibility to capture group substitution. The syntax is similar to that used +by Bash: +.sp + ${n:-string} + ${n:+string1:string2} +.sp +As in the simple case, \fIn\fP may be a group number or a name. The first form +specifies a default value. If group \fIn\fP is set, its value is inserted; if +not, the string is expanded and the result inserted. The second form specifies +strings that are expanded and inserted when group \fIn\fP is set or unset, +respectively. The first form is just a convenient shorthand for +.sp + ${n:+${n}:string} +.sp +Backslash can be used to escape colons and closing curly brackets in the +replacement strings. A change of the case forcing state within a replacement +string remains in force afterwards, as shown in this \fBpcre2test\fP example: +.sp + /(some)?(body)/substitute_extended,replace=${1:+\eU:\eL}HeLLo + body + 1: hello + somebody + 1: HELLO +.sp +The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended +substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown +groups in the extended syntax forms to be treated as unset. +.P +If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, +PCRE2_SUBSTITUTE_UNSET_EMPTY, and PCRE2_SUBSTITUTE_EXTENDED are irrelevant and +are ignored. +. +. +.SS "Substitution errors" +.rs +.sp +In the event of an error, \fBpcre2_substitute()\fP returns a negative error +code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from +\fBpcre2_match()\fP are passed straight back. +.P +PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring insertion, +unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. +.P +PCRE2_ERROR_UNSET is returned for an unset substring insertion (including an +unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) when the simple +(non-extended) syntax is used and PCRE2_SUBSTITUTE_UNSET_EMPTY is not set. +.P +PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size of buffer that is +needed is returned via \fIoutlengthptr\fP. Note that this does not happen by +default. +.P +PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the +\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP +arguments are NULL. For backward compatibility reasons an exception is made for +the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0. +.P +PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the +replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE +(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket +not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group +substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before +it started or the match started earlier than the current position in the +subject, which can happen if \eK is used in an assertion). +.P +As for all PCRE2 errors, a text message that describes the error can be +obtained by calling the \fBpcre2_get_error_message()\fP function (see +"Obtaining a textual error message" +.\" HTML +.\" +above). +.\" +. +. +.\" HTML +.SS "Substitution callouts" +.rs +.sp +.nf +.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP, +.B " int (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +The \fBpcre2_set_substitution_callout()\fP function can be used to specify a +callout function for \fBpcre2_substitute()\fP. This information is passed in +a match context. The callout function is called after each substitution has +been processed, but it can cause the replacement not to happen. +.P +The callout function is not called for simulated substitutions that happen as a +result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. In this mode, when +substitution processing exceeds the buffer space provided by the caller, +processing continues by counting code units. The simulation is unable to +populate the callout block, and so the simulation is pessimistic about the +required buffer size. Whichever is larger of accepted or rejected substitution +is reported as the required size. Therefore, the returned buffer length may be +an overestimate (without a substitution callout, it is normally an exact +measurement). +.P +The first argument of the callout function is a pointer to a substitute callout +block structure, which contains the following fields, not necessarily in this +order: +.sp + uint32_t \fIversion\fP; + uint32_t \fIsubscount\fP; + PCRE2_SPTR \fIinput\fP; + PCRE2_SPTR \fIoutput\fP; + PCRE2_SIZE \fI*ovector\fP; + uint32_t \fIoveccount\fP; + PCRE2_SIZE \fIoutput_offsets[2]\fP; +.sp +The \fIversion\fP field contains the version number of the block format. The +current version is 0. The version number will increase in future if more fields +are added, but the intention is never to remove any of the existing fields. +.P +The \fIsubscount\fP field is the number of the current match. It is 1 for the +first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP +pointers are copies of the values passed to \fBpcre2_substitute()\fP. +.P +The \fIovector\fP field points to the ovector, which contains the result of the +most recent match. The \fIoveccount\fP field contains the number of pairs that +are set in the ovector, and is always greater than zero. +.P +The \fIoutput_offsets\fP vector contains the offsets of the replacement in the +output string. This has already been processed for dollar and (if requested) +backslash substitutions as described above. +.P +The second argument of the callout function is the value passed as +\fIcallout_data\fP when the function was registered. The value returned by the +callout function is interpreted as follows: +.P +If the value is zero, the replacement is accepted, and, if +PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next +match. If the value is not zero, the current replacement is not accepted. If +the value is greater than zero, processing continues when +PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or +PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the +output and the call to \fBpcre2_substitute()\fP exits, returning the number of +matches so far. +. +. +.SS "Substitution case callouts" +.rs +.sp +.nf +.B int pcre2_set_substitute_case_callout(pcre2_match_context *\fImcontext\fP, +.B " PCRE2_SIZE (*\fIcallout_function\fP)(PCRE2_SPTR, PCRE2_SIZE," +.B " PCRE2_UCHAR *, PCRE2_SIZE," +.B " int, void *)," +.B " void *\fIcallout_data\fP);" +.fi +.sp +The \fBpcre2_set_substitution_case_callout()\fP function can be used to specify +a callout function for \fBpcre2_substitute()\fP to use when performing case +transformations. This does not affect any case insensitivity behaviour when +performing a match, but only the user-visible transformations performed when +processing a substitution such as: +.sp + pcre2_substitute(..., "\e\eU$1", ...) +.P +The default case transformations applied by PCRE2 are reasonably complete, and, +in UTF or UCP mode, perform the simple locale-invariant case transformations as +specified by Unicode. This is suitable for the internal (invisible) +case-equivalence procedures used during pattern matching, but an application +may wish to use more sophisticated locale-aware processing for the user-visible +substitution transformations. +.P +One example implementation of the \fIcallout_function\fP using the ICU +library would be: +.sp +.nf + PCRE2_SIZE + icu_case_callout( + PCRE2_SPTR input, PCRE2_SIZE input_len, + PCRE2_UCHAR *output, PCRE2_SIZE output_cap, + int to_case, void *data_ptr) + { + UErrorCode err = U_ZERO_ERROR; + int32_t r = to_case == PCRE2_SUBSTITUTE_CASE_LOWER + ? u_strToLower(output, output_cap, input, input_len, NULL, &err) + : to_case == PCRE2_SUBSTITUTE_CASE_UPPER + ? u_strToUpper(output, output_cap, input, input_len, NULL, &err) + : u_strToTitle(output, output_cap, input, input_len, &first_char_only, + NULL, &err); + if (U_FAILURE(err)) return (~(PCRE2_SIZE)0); + return r; + } +.fi +.P +The first and second arguments of the case callout function are the Unicode +string to transform. +.P +The third and fourth arguments are the output buffer and its capacity. +.P +The fifth is one of the constants PCRE2_SUBSTITUTE_CASE_LOWER, +PCRE2_SUBSTITUTE_CASE_UPPER, or PCRE2_SUBSTITUTE_CASE_TITLE_FIRST. +PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the +callout to indicate that the case of the entire callout input should be +case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that +only the first character or glyph should be transformed to Unicode titlecase +and the rest to Unicode lowercase (note that titlecasing sometimes uses Unicode +properties to titlecase each word in a string; but PCRE2 is requesting that only +the single leading character is to be titlecased). +.P +The sixth argument is the \fIcallout_data\fP supplied to +\fBpcre2_set_substitute_case_callout()\fP. +.P +The resulting string in the destination buffer may be larger or smaller than the +input, if the casing rules merge or split characters. The return value is the +length required for the output string. If a buffer of sufficient size was +provided to the callout, then the result must be written to the buffer and the +number of code units returned. If the result does not fit in the provided +buffer, then the required capacity must be returned and PCRE2 will not make use +of the output buffer. PCRE2 provides input and output buffers which overlap, so +the callout must support this by suitable internal buffering. +.P +Alternatively, if the callout wishes to indicate an error, then it may return +(~(PCRE2_SIZE)0). In this case pcre2_substitute() will immediately fail with +error PCRE2_ERROR_REPLACECASE. +.P +When a case callout is combined with the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH +option, there are situations when pcre2_substitute() will return an +underestimate of the required buffer size. If you call pcre2_substitute() once +with PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, and the input buffer is too small for +the replacement string to be constructed, then instead of calling the case +callout, pcre2_substitute() will make an estimate of the required buffer size. +The second call should also pass PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, because that +second call is not guaranteed to succeed either, if the case callout requires +more buffer space than expected. The caller must make repeated attempts in a +loop. +. +. +.SH "DUPLICATE CAPTURE GROUP NAMES" +.rs +.sp +.nf +.B int pcre2_substring_nametable_scan(const pcre2_code *\fIcode\fP, +.B " PCRE2_SPTR \fIname\fP, PCRE2_SPTR *\fIfirst\fP, PCRE2_SPTR *\fIlast\fP);" +.fi +.P +When a pattern is compiled with the PCRE2_DUPNAMES option, names for capture +groups are not required to be unique. Duplicate names are always allowed for +groups with the same number, created by using the (?| feature. Indeed, if such +groups are named, they are required to use the same names. +.P +Normally, patterns that use duplicate names are such that in any one match, +only one of each set of identically-named groups participates. An example is +shown in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.P +When duplicates are present, \fBpcre2_substring_copy_byname()\fP and +\fBpcre2_substring_get_byname()\fP return the first substring corresponding to +the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is +returned. The \fBpcre2_substring_number_from_name()\fP function returns the +error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names. +.P +If you want to get full details of all captured substrings for a given name, +you must use the \fBpcre2_substring_nametable_scan()\fP function. The first +argument is the compiled pattern, and the second is the name. If the third and +fourth arguments are NULL, the function returns a group number for a unique +name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. +.P +When the third and fourth arguments are not NULL, they must be pointers to +variables that are updated by the function. After it has run, they point to the +first and last entries in the name-to-number table for the given name, and the +function returns the length of each entry in code units. In both cases, +PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. +.P +The format of the name table is described +.\" HTML +.\" +above +.\" +in the section entitled \fIInformation about a pattern\fP. Given all the +relevant entries for the name, you can extract each of their numbers, and hence +the captured data. +. +. +.SH "FINDING ALL POSSIBLE MATCHES AT ONE POSITION" +.rs +.sp +The traditional matching function uses a similar algorithm to Perl, which stops +when it finds the first match at a given point in the subject. If you want to +find all possible matches, or the longest possible match at a given position, +consider using the alternative matching function (see below) instead. If you +cannot use the alternative function, you can kludge it up by making use of the +callout facility, which is described in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +.P +What you have to do is to insert a callout right at the end of the pattern. +When your callout function is called, extract and save the current matched +substring. Then return 1, which forces \fBpcre2_match()\fP to backtrack and try +other alternatives. Ultimately, when it runs out of matches, +\fBpcre2_match()\fP will yield PCRE2_ERROR_NOMATCH. +. +. +.\" HTML +.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION" +.rs +.sp +.nf +.B int pcre2_dfa_match(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP, +.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP," +.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP," +.B " pcre2_match_context *\fImcontext\fP," +.B " int *\fIworkspace\fP, PCRE2_SIZE \fIwscount\fP);" +.fi +.P +The function \fBpcre2_dfa_match()\fP is called to match a subject string +against a compiled pattern, using a matching algorithm that scans the subject +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does +not support, see the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. +.P +The arguments for the \fBpcre2_dfa_match()\fP function are the same as for +\fBpcre2_match()\fP, plus two extras. The ovector within the match data block +is used in a different way, and this is described below. The other common +arguments are used in the same way as for \fBpcre2_match()\fP, so their +description is not repeated here. +.P +The two additional arguments provide workspace for the function. The workspace +vector should contain at least 20 elements. It is used for keeping track of +multiple paths through the pattern tree. More workspace is needed for patterns +and subjects where there are a lot of potential matches. +.P +Here is an example of a simple call to \fBpcre2_dfa_match()\fP: +.sp + int wspace[20]; + pcre2_match_data *md = pcre2_match_data_create(4, NULL); + int rc = pcre2_dfa_match( + re, /* result of pcre2_compile() */ + "some string", /* the subject string */ + 11, /* the length of the subject string */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + md, /* the match data block */ + NULL, /* a match context; NULL means use defaults */ + wspace, /* working space vector */ + 20); /* number of elements (NOT size in bytes) */ +. +.SS "Option bits for \fBpcre2_dfa_match()\fP" +.rs +.sp +The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must +be zero. The only bits that may be set are PCRE2_ANCHORED, +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, +PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, +PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last +four of these are exactly the same as for \fBpcre2_match()\fP, so their +description is not repeated here. +.sp + PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT +.sp +These have the same general effect as they do for \fBpcre2_match()\fP, but the +details are slightly different. When PCRE2_PARTIAL_HARD is set for +\fBpcre2_dfa_match()\fP, it returns PCRE2_ERROR_PARTIAL if the end of the +subject is reached and there is still at least one matching possibility that +requires additional characters. This happens even if some complete matches have +already been found. When PCRE2_PARTIAL_SOFT is set, the return code +PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL if the end of the +subject is reached, there have been no complete matches, but there is still at +least one matching possibility. The portion of the string that was inspected +when the longest partial match was found is set as the first matching string in +both cases. There is a more detailed discussion of partial and multi-segment +matching, with examples, in the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +.sp + PCRE2_DFA_SHORTEST +.sp +Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to stop as +soon as it has found one match. Because of the way the alternative algorithm +works, this is necessarily the shortest possible match at the first possible +matching point in the subject string. +.sp + PCRE2_DFA_RESTART +.sp +When \fBpcre2_dfa_match()\fP returns a partial match, it is possible to call it +again, with additional subject characters, and have it continue with the same +match. The PCRE2_DFA_RESTART option requests this action; when it is set, the +\fIworkspace\fP and \fIwscount\fP options must reference the same vector as +before because data about the match so far is left in them after a partial +match. There is more discussion of this facility in the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +. +. +.SS "Successful returns from \fBpcre2_dfa_match()\fP" +.rs +.sp +When \fBpcre2_dfa_match()\fP succeeds, it may have matched more than one +substring in the subject. Note, however, that all the matches from one run of +the function start at the same point in the subject. The shorter matches are +all initial substrings of the longer matches. For example, if the pattern +.sp + <.*> +.sp +is matched against the string +.sp + This is no more +.sp +the three matched strings are +.sp + + + +.sp +On success, the yield of the function is a number greater than zero, which is +the number of matched substrings. The offsets of the substrings are returned in +the ovector, and can be extracted by number in the same way as for +\fBpcre2_match()\fP, but the numbers bear no relation to any capture groups +that may exist in the pattern, because DFA matching does not support capturing. +.P +Calls to the convenience functions that extract substrings by name +return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a +DFA match. The convenience functions that extract substrings by number never +return PCRE2_ERROR_NOSUBSTRING. +.P +The matched strings are stored in the ovector in reverse order of length; that +is, the longest matching string is first. If there were too many matches to fit +into the ovector, the yield of the function is zero, and the vector is filled +with the longest matches. +.P +NOTE: PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\ed+" is compiled as if it were "a\ed++". For DFA matching, this +means that only one possible match is found. If you really do want multiple +matches in such cases, either use an ungreedy repeat such as "a\ed+?" or set +the PCRE2_NO_AUTO_POSSESS option when compiling. +. +. +.SS "Error returns from \fBpcre2_dfa_match()\fP" +.rs +.sp +The \fBpcre2_dfa_match()\fP function returns a negative number when it fails. +Many of the errors are the same as for \fBpcre2_match()\fP, as described +.\" HTML +.\" +above. +.\" +There are in addition the following errors that are specific to +\fBpcre2_dfa_match()\fP: +.sp + PCRE2_ERROR_DFA_UITEM +.sp +This return is given if \fBpcre2_dfa_match()\fP encounters an item in the +pattern that it does not support, for instance, the use of \eC in a UTF mode or +a backreference. +.sp + PCRE2_ERROR_DFA_UCOND +.sp +This return is given if \fBpcre2_dfa_match()\fP encounters a condition item +that uses a backreference for the condition, or a test for recursion in a +specific capture group. These are not supported. +.sp + PCRE2_ERROR_DFA_UINVALID_UTF +.sp +This return is given if \fBpcre2_dfa_match()\fP is called for a pattern that +was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for DFA +matching. +.sp + PCRE2_ERROR_DFA_WSSIZE +.sp +This return is given if \fBpcre2_dfa_match()\fP runs out of space in the +\fIworkspace\fP vector. +.sp + PCRE2_ERROR_DFA_RECURSE +.sp +When a recursion or subroutine call is processed, the matching function calls +itself recursively, using private memory for the ovector and \fIworkspace\fP. +This error is given if the internal ovector is not large enough. This should be +extremely rare, as a vector of size 1000 is used. +.sp + PCRE2_ERROR_DFA_BADRESTART +.sp +When \fBpcre2_dfa_match()\fP is called with the \fBPCRE2_DFA_RESTART\fP option, +some plausibility checks are made on the contents of the workspace, which +should contain data about the previous partial match. If any of these checks +fail, this error is given. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2build\fP(3), \fBpcre2callout\fP(3), \fBpcre2demo(3)\fP, +\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3), +\fBpcre2sample\fP(3), \fBpcre2unicode\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 26 December 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2build.3 b/3rd/pcre2/doc/pcre2build.3 new file mode 100644 index 00000000..5b5fb402 --- /dev/null +++ b/3rd/pcre2/doc/pcre2build.3 @@ -0,0 +1,665 @@ +.TH PCRE2BUILD 3 "16 April 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +. +. +.SH "BUILDING PCRE2" +.rs +.sp +PCRE2 is distributed with a \fBconfigure\fP script that can be used to build +the library in Unix-like environments using the applications known as +Autotools. Also in the distribution are files to support building using +\fBCMake\fP instead of \fBconfigure\fP. The text file +.\" HTML +.\" +\fBREADME\fP +.\" +contains general information about building with Autotools (some of which is +repeated below), and also has some comments about building on various operating +systems. The files in the \fBvms\fP directory support building under OpenVMS. +There is a lot more information about building PCRE2 without using +Autotools (including information about using \fBCMake\fP and building "by +hand") in the text file called +.\" HTML +.\" +\fBNON-AUTOTOOLS-BUILD\fP. +.\" +You should consult this file as well as the +.\" HTML +.\" +\fBREADME\fP +.\" +file if you are building in a non-Unix-like environment. +. +. +.SH "PCRE2 BUILD-TIME OPTIONS" +.rs +.sp +The rest of this document describes the optional features of PCRE2 that can be +selected when the library is compiled. It assumes use of the \fBconfigure\fP +script, where the optional features are selected or deselected by providing +options to \fBconfigure\fP before running the \fBmake\fP command. However, the +same options can be selected in both Unix-like and non-Unix-like environments +if you are using \fBCMake\fP instead of \fBconfigure\fP to build PCRE2. +.P +If you are not using Autotools or \fBCMake\fP, option selection can be done by +editing the \fBconfig.h\fP file, or by passing parameter settings to the +compiler, as described in +.\" HTML +.\" +\fBNON-AUTOTOOLS-BUILD\fP. +.\" +.P +The complete list of options for \fBconfigure\fP (which includes the standard +ones such as the selection of the installation directory) can be obtained by +running +.sp + ./configure --help +.sp +The following sections include descriptions of "on/off" options whose names +begin with --enable or --disable. Because of the way that \fBconfigure\fP +works, --enable and --disable always come in pairs, so the complementary option +always exists as well, but as it specifies the default, it is not described. +Options that specify values have names that start with --with. At the end of a +\fBconfigure\fP run, a summary of the configuration is output. +. +. +.SH "BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES" +.rs +.sp +By default, a library called \fBlibpcre2-8\fP is built, containing functions +that take string arguments contained in arrays of bytes, interpreted either as +single-byte characters, or UTF-8 strings. You can also build two other +libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process +strings that are contained in arrays of 16-bit and 32-bit code units, +respectively. These can be interpreted either as single-unit characters or +UTF-16/UTF-32 strings. To build these additional libraries, add one or both of +the following to the \fBconfigure\fP command: +.sp + --enable-pcre2-16 + --enable-pcre2-32 +.sp +If you do not want the 8-bit library, add +.sp + --disable-pcre2-8 +.sp +as well. At least one of the three libraries must be built. Note that the POSIX +wrapper is for the 8-bit library only, and that \fBpcre2grep\fP is an 8-bit +program. Neither of these are built if you select only the 16-bit or 32-bit +libraries. +. +. +.SH "BUILDING SHARED AND STATIC LIBRARIES" +.rs +.sp +The Autotools PCRE2 building process uses \fBlibtool\fP to build both shared +and static libraries by default. You can suppress an unwanted library by adding +one of +.sp + --disable-shared + --disable-static +.sp +to the \fBconfigure\fP command. Setting --disable-shared ensures that PCRE2 +libraries are built as static libraries. The binaries that are then created as +part of the build process (for example, \fBpcre2test\fP and \fBpcre2grep\fP) +are linked statically with one or more PCRE2 libraries, but may also be +dynamically linked with other libraries such as \fBlibc\fP. If you want these +binaries to be fully statically linked, you can set LDFLAGS like this: +.sp +LDFLAGS=--static ./configure --disable-shared +.sp +Note the two hyphens in --static. Of course, this works only if static versions +of all the relevant libraries are available for linking. +. +. +.SH "UNICODE AND UTF SUPPORT" +.rs +.sp +By default, PCRE2 is built with support for Unicode and UTF character strings. +To build it without Unicode support, add +.sp + --disable-unicode +.sp +to the \fBconfigure\fP command. This setting applies to all three libraries. It +is not possible to build one library with Unicode support and another without +in the same configuration. +.P +Of itself, Unicode support does not make PCRE2 treat strings as UTF-8, UTF-16 +or UTF-32. To do that, applications that use the library can set the PCRE2_UTF +option when they call \fBpcre2_compile()\fP to compile a pattern. +Alternatively, patterns may be started with (*UTF) unless the application has +locked this out by setting PCRE2_NEVER_UTF. +.P +UTF support allows the libraries to process character code points up to +0x10ffff in the strings that they handle. Unicode support also gives access to +the Unicode properties of characters, using pattern escapes such as \eP, \ep, +and \eX. Only the general category properties such as \fILu\fP and \fINd\fP, +script names, and some bi-directional properties are supported. Details are +given in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.P +Pattern escapes such as \ed and \ew do not by default make use of Unicode +properties. The application can request that they do by setting the PCRE2_UCP +option. Unless the application has set PCRE2_NEVER_UCP, a pattern may also +request this by starting with (*UCP). +. +. +.SH "DISABLING THE USE OF \eC" +.rs +.sp +The \eC escape sequence, which matches a single code unit, even in a UTF mode, +can cause unpredictable behaviour because it may leave the current matching +point in the middle of a multi-code-unit character. The application can lock it +out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +\fBpcre2_compile()\fP. There is also a build-time option +.sp + --enable-never-backslash-C +.sp +(note the upper case C) which locks out the use of \eC entirely. +. +. +.SH "JUST-IN-TIME COMPILER SUPPORT" +.rs +.sp +Just-in-time (JIT) compiler support is included in the build by specifying +.sp + --enable-jit +.sp +This support is available only for certain hardware architectures. If this +option is set for an unsupported architecture, a building error occurs. +If in doubt, use +.sp + --enable-jit=auto +.sp +which enables JIT only if the current hardware is supported. You can check +if JIT is enabled in the configuration summary that is output at the end of a +\fBconfigure\fP run. If you are enabling JIT under SELinux you may also want to +add +.sp + --enable-jit-sealloc +.sp +which enables the use of an execmem allocator in JIT that is compatible with +SELinux. This has no effect if JIT is not enabled. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for a discussion of JIT usage. When JIT support is enabled, +\fBpcre2grep\fP automatically makes use of it, unless you add +.sp + --disable-pcre2grep-jit +.sp +to the \fBconfigure\fP command. +. +. +.SH "NEWLINE RECOGNITION" +.rs +.sp +By default, PCRE2 interprets the linefeed (LF) character as indicating the end +of a line. This is the normal newline character on Unix-like systems. You can +compile PCRE2 to use carriage return (CR) instead, by adding +.sp + --enable-newline-is-cr +.sp +to the \fBconfigure\fP command. There is also an --enable-newline-is-lf option, +which explicitly specifies linefeed as the newline character. +.P +Alternatively, you can specify that line endings are to be indicated by the +two-character sequence CRLF (CR immediately followed by LF). If you want this, +add +.sp + --enable-newline-is-crlf +.sp +to the \fBconfigure\fP command. There is a fourth option, specified by +.sp + --enable-newline-is-anycrlf +.sp +which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as +indicating a line ending. A fifth option, specified by +.sp + --enable-newline-is-any +.sp +causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline +sequences are the three just mentioned, plus the single characters VT (vertical +tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line +separator, U+2028), and PS (paragraph separator, U+2029). The final option is +.sp + --enable-newline-is-nul +.sp +which causes NUL (binary zero) to be set as the default line-ending character. +.P +Whatever default line ending convention is selected when PCRE2 is built can be +overridden by applications that use the library. At build time it is +recommended to use the standard for your operating system. +. +. +.SH "WHAT \eR MATCHES" +.rs +.sp +By default, the sequence \eR in a pattern matches any Unicode newline sequence, +independently of what has been selected as the line ending sequence. If you +specify +.sp + --enable-bsr-anycrlf +.sp +the default is changed so that \eR matches only CR, LF, or CRLF. Whatever is +selected when PCRE2 is built can be overridden by applications that use the +library. +. +. +.SH "HANDLING VERY LARGE PATTERNS" +.rs +.sp +Within a compiled pattern, offset values are used to point from one part to +another (for example, from an opening parenthesis to an alternation +metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values +are used for these offsets, leading to a maximum size for a compiled pattern of +around 64 thousand code units. This is sufficient to handle all but the most +gigantic patterns. Nevertheless, some people do want to process truly enormous +patterns, so it is possible to compile PCRE2 to use three-byte or four-byte +offsets by adding a setting such as +.sp + --with-link-size=3 +.sp +to the \fBconfigure\fP command. The value given must be 2, 3, or 4. For the +16-bit library, a value of 3 is rounded up to 4. In these libraries, using +longer offsets slows down the operation of PCRE2 because it has to load +additional data when handling them. For the 32-bit library the value is always +4 and cannot be overridden; the value of --with-link-size is ignored. +. +. +.SH "LIMITING PCRE2 RESOURCE USAGE" +.rs +.sp +The \fBpcre2_match()\fP function increments a counter each time it goes round +its main loop. Putting a limit on this counter controls the amount of computing +resource used by a single call to \fBpcre2_match()\fP. The limit can be changed +at run time, as described in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. The default is 10 million, but this can be changed by adding a +setting such as +.sp + --with-match-limit=500000 +.sp +to the \fBconfigure\fP command. This setting also applies to the +\fBpcre2_dfa_match()\fP matching function, and to JIT matching (though the +counting is done differently). +.P +The \fBpcre2_match()\fP function uses heap memory to record backtracking +points. The more nested backtracking points there are (that is, the deeper the +search tree), the more memory is needed. There is an upper limit, specified in +kibibytes (units of 1024 bytes). This limit can be changed at run time, as +described in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. The default limit (in effect unlimited) is 20 million. You can +change this by a setting such as +.sp + --with-heap-limit=500 +.sp +which limits the amount of heap to 500 KiB. This limit applies only to +interpretive matching in \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, which +may also use the heap for internal workspace when processing complicated +patterns. This limit does not apply when JIT (which has its own memory +arrangements) is used. +.P +You can also explicitly limit the depth of nested backtracking in the +\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set +for --with-match-limit. You can set a lower default limit by adding, for +example, +.sp + --with-match-limit-depth=10000 +.sp +to the \fBconfigure\fP command. This value can be overridden at run time. This +depth limit indirectly limits the amount of heap memory that is used, but +because the size of each backtracking "frame" depends on the number of +capturing parentheses in a pattern, the amount of heap that is used before the +limit is reached varies from pattern to pattern. This limit was more useful in +versions before 10.30, where function recursion was used for backtracking. +.P +As well as applying to \fBpcre2_match()\fP, the depth limit also controls +the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are +used for lookaround assertions, atomic groups, and recursion within patterns. +The limit does not apply to JIT matching. +. +. +.SH "LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS" +.rs +.sp +Lookbehind assertions in which one or more branches can match a variable number +of characters are supported only if there is a maximum matching length for each +top-level branch. There is a limit to this maximum that defaults to 255 +characters. You can alter this default by a setting such as +.sp + --with-max-varlookbehind=100 +.sp +The limit can be changed at runtime by calling +\fBpcre2_set_max_varlookbehind()\fP. Lookbehind assertions in which every +branch matches a fixed number of characters (not necessarily all the same) are +not constrained by this limit. +. +. +.\" HTML +.SH "CREATING CHARACTER TABLES AT BUILD TIME" +.rs +.sp +PCRE2 uses fixed tables for processing characters whose code points are less +than 256. By default, PCRE2 is built with a set of tables that are distributed +in the file \fIsrc/pcre2_chartables.c.dist\fP. These tables are for ASCII codes +only. If you add +.sp + --enable-rebuild-chartables +.sp +to the \fBconfigure\fP command, the distributed tables are no longer used. +Instead, a program called \fBpcre2_dftables\fP is compiled and run. This +outputs the source for new set of tables, created in the default locale of your +C run-time system. This method of replacing the tables does not work if you are +cross compiling, because \fBpcre2_dftables\fP needs to be run on the local +host and therefore not compiled with the cross compiler. +.P +If you need to create alternative tables when cross compiling, you will have to +do so "by hand". There may also be other reasons for creating tables manually. +To cause \fBpcre2_dftables\fP to be built on the local host, run a normal +compiling command, and then run the program with the output file as its +argument, for example: +.sp + cc src/pcre2_dftables.c -o pcre2_dftables + ./pcre2_dftables src/pcre2_chartables.c +.sp +This builds the tables in the default locale of the local host. If you want to +specify a locale, you must use the -L option: +.sp + LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c +.sp +You can also specify -b (with or without -L). This causes the tables to be +written in binary instead of as source code. A set of binary tables can be +loaded into memory by an application and passed to \fBpcre2_compile()\fP in the +same way as tables created by calling \fBpcre2_maketables()\fP. The tables are +just a string of bytes, independent of hardware characteristics such as +endianness. This means they can be bundled with an application that runs in +different environments, to ensure consistent behaviour. +. +. +.SH "USING EBCDIC CODE" +.rs +.sp +PCRE2 assumes by default that it will run in an environment where the character +code is ASCII or Unicode, which is a superset of ASCII. This is the case for +most computer operating systems. PCRE2 can, however, be compiled to run in an +8-bit EBCDIC environment by adding +.sp + --enable-ebcdic --disable-unicode +.sp +to the \fBconfigure\fP command. This setting implies +--enable-rebuild-chartables. You should only use it if you know that you are in +an EBCDIC environment (for example, an IBM mainframe operating system). +.P +It is not possible to support both EBCDIC and UTF-8 codes in the same version +of the library. Consequently, --enable-unicode and --enable-ebcdic are mutually +exclusive. +.P +The EBCDIC character that corresponds to an ASCII LF is assumed to have the +value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In +such an environment you should use +.sp + --enable-ebcdic-nl25 +.sp +as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the +same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is \fInot\fP +chosen as LF is made to correspond to the Unicode NEL character (which, in +Unicode, is 0x85). +.P +The options that select newline behaviour, such as --enable-newline-is-cr, +and equivalent run-time options, refer to these character values in an EBCDIC +environment. +. +. +.SH "PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS" +.rs +.sp +By default \fBpcre2grep\fP supports the use of callouts with string arguments +within the patterns it is matching. There are two kinds: one that generates +output using local code, and another that calls an external program or script. +If --disable-pcre2grep-callout-fork is added to the \fBconfigure\fP command, +only the first kind of callout is supported; if --disable-pcre2grep-callout is +used, all callouts are completely ignored. For more details of \fBpcre2grep\fP +callouts, see the +.\" HREF +\fBpcre2grep\fP +.\" +documentation. +. +. +.SH "PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT" +.rs +.sp +By default, \fBpcre2grep\fP reads all files as plain text. You can build it so +that it recognizes files whose names end in \fB.gz\fP or \fB.bz2\fP, and reads +them with \fBlibz\fP or \fBlibbz2\fP, respectively, by adding one or both of +.sp + --enable-pcre2grep-libz + --enable-pcre2grep-libbz2 +.sp +to the \fBconfigure\fP command. These options naturally require that the +relevant libraries are installed on your system. Configuration will fail if +they are not. +. +. +.SH "PCRE2GREP BUFFER SIZE" +.rs +.sp +\fBpcre2grep\fP uses an internal buffer to hold a "window" on the file it is +scanning, in order to be able to output "before" and "after" lines when it +finds a match. The default starting size of the buffer is 20KiB. The buffer +itself is three times this size, but because of the way it is used for holding +"before" lines, the longest line that is guaranteed to be processable is the +notional buffer size. If a longer line is encountered, \fBpcre2grep\fP +automatically expands the buffer, up to a specified maximum size, whose default +is 1MiB or the starting size, whichever is the larger. You can change the +default parameter values by adding, for example, +.sp + --with-pcre2grep-bufsize=51200 + --with-pcre2grep-max-bufsize=2097152 +.sp +to the \fBconfigure\fP command. The caller of \fBpcre2grep\fP can override +these values by using --buffer-size and --max-buffer-size on the command line. +. +. +.SH "PCRE2TEST OPTION FOR LIBREADLINE SUPPORT" +.rs +.sp +If you add one of +.sp + --enable-pcre2test-libreadline + --enable-pcre2test-libedit +.sp +to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the +\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is +from a terminal, it reads it using the \fBreadline()\fP function. This provides +line-editing and history facilities. Note that \fBlibreadline\fP is +GPL-licensed, so if you distribute a binary of \fBpcre2test\fP linked in this +way, there may be licensing issues. These can be avoided by linking instead +with \fBlibedit\fP, which has a BSD licence. +.P +Setting --enable-pcre2test-libreadline causes the \fB-lreadline\fP option to be +added to the \fBpcre2test\fP build. In many operating environments with a +system-installed readline library this is sufficient. However, in some +environments (e.g. if an unmodified distribution version of readline is in +use), some extra configuration may be necessary. The INSTALL file for +\fBlibreadline\fP says this: +.sp + "Readline uses the termcap functions, but does not link with + the termcap or curses library itself, allowing applications + which link with readline the to choose an appropriate library." +.sp +If your environment has not been set up so that an appropriate library is +automatically included, you may need to add something like +.sp + LIBS="-ncurses" +.sp +immediately before the \fBconfigure\fP command. +. +. +.SH "INCLUDING DEBUGGING CODE" +.rs +.sp +If you add +.sp + --enable-debug +.sp +to the \fBconfigure\fP command, additional debugging code is included in the +build. This feature is intended for use by the PCRE2 maintainers. +. +. +.SH "DEBUGGING WITH VALGRIND SUPPORT" +.rs +.sp +If you add +.sp + --enable-valgrind +.sp +to the \fBconfigure\fP command, PCRE2 will use valgrind annotations to mark +certain memory regions as unaddressable. This allows it to detect invalid +memory accesses, and is mostly useful for debugging PCRE2 itself. +. +. +.SH "CODE COVERAGE REPORTING" +.rs +.sp +If your C compiler is gcc, you can build a version of PCRE2 that can generate a +code coverage report for its test suite. To enable this, you must install +\fBlcov\fP version 1.6 or above. Then specify +.sp + --enable-coverage +.sp +to the \fBconfigure\fP command and build PCRE2 in the usual way. +.P +Note that using \fBccache\fP (a caching C compiler) is incompatible with code +coverage reporting. If you have configured \fBccache\fP to run automatically +on your system, you must set the environment variable +.sp + CCACHE_DISABLE=1 +.sp +before running \fBmake\fP to build PCRE2, so that \fBccache\fP is not used. +.P +When --enable-coverage is used, the following addition targets are added to the +\fIMakefile\fP: +.sp + make coverage +.sp +This creates a fresh coverage report for the PCRE2 test suite. It is equivalent +to running "make coverage-reset", "make coverage-baseline", "make check", and +then "make coverage-report". +.sp + make coverage-reset +.sp +This zeroes the coverage counters, but does nothing else. +.sp + make coverage-baseline +.sp +This captures baseline coverage information. +.sp + make coverage-report +.sp +This creates the coverage report. +.sp + make coverage-clean-report +.sp +This removes the generated coverage report without cleaning the coverage data +itself. +.sp + make coverage-clean-data +.sp +This removes the captured coverage data without removing the coverage files +created at compile time (*.gcno). +.sp + make coverage-clean +.sp +This cleans all coverage data including the generated coverage report. For more +information about code coverage, see the \fBgcov\fP and \fBlcov\fP +documentation. +. +. +.SH "DISABLING THE Z AND T FORMATTING MODIFIERS" +.rs +.sp +The C99 standard defines formatting modifiers z and t for size_t and +ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in +environments other than old versions of Microsoft Visual Studio when +__STDC_VERSION__ is defined and has a value greater than or equal to 199901L +(indicating support for C99). +However, there is at least one environment that claims to be C99 but does not +support these modifiers. If +.sp + --disable-percent-zt +.sp +is specified, no use is made of the z or t modifiers. Instead of %td or %zu, +a suitable format is used depending in the size of long for the platform. +. +. +.SH "SUPPORT FOR FUZZERS" +.rs +.sp +There is a special option for use by people who want to run fuzzing tests on +PCRE2: +.sp + --enable-fuzz-support +.sp +At present this applies only to the 8-bit library. If set, it causes an extra +library called libpcre2-fuzzsupport.a to be built, but not installed. This +contains a single function called LLVMFuzzerTestOneInput() whose arguments are +a pointer to a string and the length of the string. When called, this function +tries to compile the string as a pattern, and if that succeeds, to match it. +This is done both with no options and with some random options bits that are +generated from the string. +.P +Setting --enable-fuzz-support also causes a binary called \fBpcre2fuzzcheck\fP +to be created. This is normally run under valgrind or used when PCRE2 is +compiled with address sanitizing enabled. It calls the fuzzing function and +outputs information about what it is doing. The input strings are specified by +arguments: if an argument starts with "=" the rest of it is a literal input +string. Otherwise, it is assumed to be a file name, and the contents of the +file are the test string. +. +. +.SH "OBSOLETE OPTION" +.rs +.sp +In versions of PCRE2 prior to 10.30, there were two ways of handling +backtracking in the \fBpcre2_match()\fP function. The default was to use the +system stack, but if +.sp + --disable-stack-for-recursion +.sp +was set, memory on the heap was used. From release 10.30 onwards this has +changed (the stack is no longer used) and this option now does nothing except +give a warning. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2api\fP(3), \fBpcre2-config\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 16 April 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2callout.3 b/3rd/pcre2/doc/pcre2callout.3 new file mode 100644 index 00000000..ee04fbca --- /dev/null +++ b/3rd/pcre2/doc/pcre2callout.3 @@ -0,0 +1,457 @@ +.TH PCRE2CALLOUT 3 "19 January 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH SYNOPSIS +.rs +.sp +.B #include +.PP +.SM +.nf +.B int (*pcre2_callout)(pcre2_callout_block *, void *); +.sp +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.fi +. +.SH DESCRIPTION +.rs +.sp +PCRE2 provides a feature called "callout", which is a means of temporarily +passing control to the caller of PCRE2 in the middle of pattern matching. The +caller of PCRE2 provides an external function by putting its entry point in +a match context (see \fBpcre2_set_callout()\fP in the +.\" HREF +\fBpcre2api\fP +.\" +documentation). +.P +When using the \fBpcre2_substitute()\fP function, an additional callout feature +is available. This does a callout after each change to the subject string and +is described in the +.\" HREF +\fBpcre2api\fP +.\" +documentation; the rest of this document is concerned with callouts during +pattern matching. +.P +Within a regular expression, (?C) indicates a point at which the external +function is to be called. Different callout points can be identified by putting +a number less than 256 after the letter C. The default value is zero. +Alternatively, the argument may be a delimited string. The starting delimiter +must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the +start, except for {, where the ending delimiter is }. If the ending delimiter +is needed within the string, it must be doubled. For example, this pattern has +two callout points: +.sp + (?C1)abc(?C"some ""arbitrary"" text")def +.sp +If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2 +automatically inserts callouts, all with number 255, before each item in the +pattern except for immediately before or after an explicit callout. For +example, if PCRE2_AUTO_CALLOUT is used with the pattern +.sp + A(?C3)B +.sp +it is processed as if it were +.sp + (?C255)A(?C3)B(?C255) +.sp +Here is a more complicated example: +.sp + A(\ed{2}|--) +.sp +With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were +.sp + (?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255) +.sp +Notice that there is a callout before and after each parenthesis and +alternation bar. If the pattern contains a conditional group whose condition is +an assertion, an automatic callout is inserted immediately before the +condition. Such a callout may also be inserted explicitly, for example: +.sp + (?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de) +.sp +This applies only to assertion conditions (because they are themselves +independent groups). +.P +Callouts can be useful for tracking the progress of pattern matching. The +.\" HREF +\fBpcre2test\fP +.\" +program has a pattern qualifier (/auto_callout) that sets automatic callouts. +When any callouts are present, the output from \fBpcre2test\fP indicates how +the pattern is being matched. This is useful information when you are trying to +optimize the performance of a particular pattern. +. +. +.SH "MISSING CALLOUTS" +.rs +.sp +You should be aware that, because of optimizations in the way PCRE2 compiles +and matches patterns, callouts sometimes do not happen exactly as you might +expect. +. +. +.SS "Auto-possessification" +.rs +.sp +At compile time, PCRE2 "auto-possessifies" repeated items when it knows that +what follows cannot be part of the repeat. For example, a+[bc] is compiled as +if it were a++[bc]. The \fBpcre2test\fP output when this pattern is compiled +with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string +"aaaa" is: +.sp + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + No match +.sp +This indicates that when matching [bc] fails, there is no backtracking into a+ +(because it is being treated as a++) and therefore the callouts that would be +taken for the backtracks do not occur. You can disable the auto-possessify +feature by passing PCRE2_NO_AUTO_POSSESS to \fBpcre2_compile()\fP, or starting +the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this: +.sp + --->aaaa + +0 ^ a+ + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^ ^ [bc] + +2 ^^ [bc] + No match +.sp +This time, when matching [bc] fails, the matcher backtracks into a+ and tries +again, repeatedly, until a+ itself fails. +. +. +.SS "Automatic .* anchoring" +.rs +.sp +By default, an optimization is applied when .* is the first significant item in +a pattern. If PCRE2_DOTALL is set, so that the dot can match any character, the +pattern is automatically anchored. If PCRE2_DOTALL is not set, a match can +start only after an internal newline or at the beginning of the subject, and +\fBpcre2_compile()\fP remembers this. If a pattern has more than one top-level +branch, automatic anchoring occurs if all branches are anchorable. +.P +This optimization is disabled, however, if .* is in an atomic group or if there +is a backreference to the capture group in which it appears. It is also +disabled if the pattern contains (*PRUNE) or (*SKIP). However, the presence of +callouts does not affect it. +.P +For example, if the pattern .*\ed is compiled with PCRE2_AUTO_CALLOUT and +applied to the string "aa", the \fBpcre2test\fP output is: +.sp + --->aa + +0 ^ .* + +2 ^ ^ \ed + +2 ^^ \ed + +2 ^ \ed + No match +.sp +This shows that all match attempts start at the beginning of the subject. In +other words, the pattern is anchored. You can disable this optimization by +passing PCRE2_NO_DOTSTAR_ANCHOR to \fBpcre2_compile()\fP, or starting the +pattern with (*NO_DOTSTAR_ANCHOR). In this case, the output changes to: +.sp + --->aa + +0 ^ .* + +2 ^ ^ \ed + +2 ^^ \ed + +2 ^ \ed + +0 ^ .* + +2 ^^ \ed + +2 ^ \ed + No match +.sp +This shows more match attempts, starting at the second subject character. +Another optimization, described in the next section, means that there is no +subsequent attempt to match with an empty subject. +. +. +.SS "Other optimizations" +.rs +.sp +Other optimizations that provide fast "no match" results also affect callouts. +For example, if the pattern is +.sp + ab(?C4)cd +.sp +PCRE2 knows that any matching string must contain the letter "d". If the +subject string is "abyz", the lack of "d" means that matching doesn't ever +start, and the callout is never reached. However, with "abyd", though the +result is still no match, the callout is obeyed. +.P +For most patterns PCRE2 also knows the minimum length of a matching string, and +will immediately give a "no match" return without actually running a match if +the subject is not long enough, or, for unanchored patterns, if it has been +scanned far enough. +.P +You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE +option to \fBpcre2_compile()\fP, or by starting the pattern with +(*NO_START_OPT). This slows down the matching process, but does ensure that +callouts such as the example above are obeyed. +. +. +.\" HTML +.SH "THE CALLOUT INTERFACE" +.rs +.sp +During matching, when PCRE2 reaches a callout point, if an external function is +provided in the match context, it is called. This applies to both normal, +DFA, and JIT matching. The first argument to the callout function is a pointer +to a \fBpcre2_callout\fP block. The second argument is the void * callout data +that was supplied when the callout was set up by calling +\fBpcre2_set_callout()\fP (see the +.\" HREF +\fBpcre2api\fP +.\" +documentation). The callout block structure contains the following fields, not +necessarily in this order: +.sp + uint32_t \fIversion\fP; + uint32_t \fIcallout_number\fP; + uint32_t \fIcapture_top\fP; + uint32_t \fIcapture_last\fP; + uint32_t \fIcallout_flags\fP; + PCRE2_SIZE *\fIoffset_vector\fP; + PCRE2_SPTR \fImark\fP; + PCRE2_SPTR \fIsubject\fP; + PCRE2_SIZE \fIsubject_length\fP; + PCRE2_SIZE \fIstart_match\fP; + PCRE2_SIZE \fIcurrent_position\fP; + PCRE2_SIZE \fIpattern_position\fP; + PCRE2_SIZE \fInext_item_length\fP; + PCRE2_SIZE \fIcallout_string_offset\fP; + PCRE2_SIZE \fIcallout_string_length\fP; + PCRE2_SPTR \fIcallout_string\fP; +.sp +The \fIversion\fP field contains the version number of the block format. The +current version is 2; the three callout string fields were added for version 1, +and the \fIcallout_flags\fP field for version 2. If you are writing an +application that might use an earlier release of PCRE2, you should check the +version number before accessing any of these fields. The version number will +increase in future if more fields are added, but the intention is never to +remove any of the existing fields. +. +. +.SS "Fields for numerical callouts" +.rs +.sp +For a numerical callout, \fIcallout_string\fP is NULL, and \fIcallout_number\fP +contains the number of the callout, in the range 0-255. This is the number +that follows (?C for callouts that part of the pattern; it is 255 for +automatically generated callouts. +. +. +.SS "Fields for string callouts" +.rs +.sp +For callouts with string arguments, \fIcallout_number\fP is always zero, and +\fIcallout_string\fP points to the string that is contained within the compiled +pattern. Its length is given by \fIcallout_string_length\fP. Duplicated ending +delimiters that were present in the original pattern string have been turned +into single characters, but there is no other processing of the callout string +argument. An additional code unit containing binary zero is present after the +string, but is not included in the length. The delimiter that was used to start +the string is also stored within the pattern, immediately before the string +itself. You can access this delimiter as \fIcallout_string\fP[-1] if you need +it. +.P +The \fIcallout_string_offset\fP field is the code unit offset to the start of +the callout argument string within the original pattern string. This is +provided for the benefit of applications such as script languages that might +need to report errors in the callout string within the pattern. +. +. +.SS "Fields for all callouts" +.rs +.sp +The remaining fields in the callout block are the same for both kinds of +callout. +.P +The \fIoffset_vector\fP field is a pointer to a vector of capturing offsets +(the "ovector"). You may read the elements in this vector, but you must not +change any of them. +.P +For calls to \fBpcre2_match()\fP, the \fIoffset_vector\fP field is not (since +release 10.30) a pointer to the actual ovector that was passed to the matching +function in the match data block. Instead it points to an internal ovector of a +size large enough to hold all possible captured substrings in the pattern. Note +that whenever a recursion or subroutine call within a pattern completes, the +capturing state is reset to what it was before. +.P +The \fIcapture_last\fP field contains the number of the most recently captured +substring, and the \fIcapture_top\fP field contains one more than the number of +the highest numbered captured substring so far. If no substrings have yet been +captured, the value of \fIcapture_last\fP is 0 and the value of +\fIcapture_top\fP is 1. The values of these fields do not always differ by one; +for example, when the callout in the pattern ((a)(b))(?C2) is taken, +\fIcapture_last\fP is 1 but \fIcapture_top\fP is 4. +.P +The contents of ovector[2] to ovector[*2-1] can be inspected in +order to extract substrings that have been matched so far, in the same way as +extracting substrings after a match has completed. The values in ovector[0] and +ovector[1] are always PCRE2_UNSET because the match is by definition not +complete. Substrings that have not been captured but whose numbers are less +than \fIcapture_top\fP also have both of their ovector slots set to +PCRE2_UNSET. +.P +For DFA matching, the \fIoffset_vector\fP field points to the ovector that was +passed to the matching function in the match data block for callouts at the top +level, but to an internal ovector during the processing of pattern recursions, +lookarounds, and atomic groups. However, these ovectors hold no useful +information because \fBpcre2_dfa_match()\fP does not support substring +capturing. The value of \fIcapture_top\fP is always 1 and the value of +\fIcapture_last\fP is always 0 for DFA matching. +.P +The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values +that were passed to the matching function. +.P +The \fIstart_match\fP field normally contains the offset within the subject at +which the current match attempt started. However, if the escape sequence \eK +has been encountered, this value is changed to reflect the modified starting +point. If the pattern is not anchored, the callout function may be called +several times from the same point in the pattern for different starting points +in the subject. +.P +The \fIcurrent_position\fP field contains the offset within the subject of the +current match pointer. +.P +The \fIpattern_position\fP field contains the offset in the pattern string to +the next item to be matched. +.P +The \fInext_item_length\fP field contains the length of the next item to be +processed in the pattern string. When the callout is at the end of the pattern, +the length is zero. When the callout precedes an opening parenthesis, the +length includes meta characters that follow the parenthesis. For example, in a +callout before an assertion such as (?=ab) the length is 3. For an alternation +bar or a closing parenthesis, the length is one, unless a closing parenthesis +is followed by a quantifier, in which case its length is included. (This +changed in release 10.23. In earlier releases, before an opening parenthesis +the length was that of the entire group, and before an alternation bar or a +closing parenthesis the length was zero.) +.P +The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to +help in distinguishing between different automatic callouts, which all have the +same callout number. However, they are set for all callouts, and are used by +\fBpcre2test\fP to show the next item to be matched when displaying callout +information. +.P +In callouts from \fBpcre2_match()\fP the \fImark\fP field contains a pointer to +the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or +(*THEN) item in the match, or NULL if no such items have been passed. Instances +of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In +callouts from the DFA matching function this field always contains NULL. +.P +The \fIcallout_flags\fP field is always zero in callouts from +\fBpcre2_dfa_match()\fP or when JIT is being used. When \fBpcre2_match()\fP +without JIT is used, the following bits may be set: +.sp + PCRE2_CALLOUT_STARTMATCH +.sp +This is set for the first callout after the start of matching for each new +starting position in the subject. +.sp + PCRE2_CALLOUT_BACKTRACK +.sp +This is set if there has been a matching backtrack since the previous callout, +or since the start of matching if this is the first callout from a +\fBpcre2_match()\fP run. +.P +Both bits are set when a backtrack has caused a "bumpalong" to a new starting +position in the subject. Output from \fBpcre2test\fP does not indicate the +presence of these bits unless the \fBcallout_extra\fP modifier is set. +.P +The information in the \fBcallout_flags\fP field is provided so that +applications can track and tell their users how matching with backtracking is +done. This can be useful when trying to optimize patterns, or just to +understand how PCRE2 works. There is no support in \fBpcre2_dfa_match()\fP +because there is no backtracking in DFA matching, and there is no support in +JIT because JIT is all about maximimizing matching performance. In both these +cases the \fBcallout_flags\fP field is always zero. +. +. +.SH "RETURN VALUES FROM CALLOUTS" +.rs +.sp +The external callout function returns an integer to PCRE2. If the value is +zero, matching proceeds as normal. If the value is greater than zero, matching +fails at the current point, but the testing of other matching possibilities +goes ahead, just as if a lookahead assertion had failed. If the value is less +than zero, the match is abandoned, and the matching function returns the +negative value. +.P +Negative values should normally be chosen from the set of PCRE2_ERROR_xxx +values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match" +failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout +functions; it will never be used by PCRE2 itself. +. +. +.SH "CALLOUT ENUMERATION" +.rs +.sp +.nf +.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, +.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *)," +.B " void *\fIuser_data\fP);" +.fi +.sp +A script language that supports the use of string arguments in callouts might +like to scan all the callouts in a pattern before running the match. This can +be done by calling \fBpcre2_callout_enumerate()\fP. The first argument is a +pointer to a compiled pattern, the second points to a callback function, and +the third is arbitrary user data. The callback function is called for every +callout in the pattern in the order in which they appear. Its first argument is +a pointer to a callout enumeration block, and its second argument is the +\fIuser_data\fP value that was passed to \fBpcre2_callout_enumerate()\fP. The +data block contains the following fields: +.sp + \fIversion\fP Block version number + \fIpattern_position\fP Offset to next item in pattern + \fInext_item_length\fP Length of next item in pattern + \fIcallout_number\fP Number for numbered callouts + \fIcallout_string_offset\fP Offset to string within pattern + \fIcallout_string_length\fP Length of callout string + \fIcallout_string\fP Points to callout string or is NULL +.sp +The version number is currently 0. It will increase if new fields are ever +added to the block. The remaining fields are the same as their namesakes in the +\fBpcre2_callout\fP block that is used for callouts during matching, as +described +.\" HTML +.\" +above. +.\" +.P +Note that the value of \fIpattern_position\fP is unique for each callout. +However, if a callout occurs inside a group that is quantified with a non-zero +minimum or a fixed maximum, the group is replicated inside the compiled +pattern. For example, a pattern such as /(a){2}/ is compiled as if it were +/(a)(a)/. This means that the callout will be enumerated more than once, but +with the same value for \fIpattern_position\fP in each case. +.P +The callback function should normally return zero. If it returns a non-zero +value, scanning the pattern stops, and that value is returned from +\fBpcre2_callout_enumerate()\fP. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 19 January 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2compat.3 b/3rd/pcre2/doc/pcre2compat.3 new file mode 100644 index 00000000..281c7e59 --- /dev/null +++ b/3rd/pcre2/doc/pcre2compat.3 @@ -0,0 +1,256 @@ +.TH PCRE2COMPAT 3 "02 October 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "DIFFERENCES BETWEEN PCRE2 AND PERL" +.rs +.sp +This document describes some of the known differences in the ways that PCRE2 +and Perl handle regular expressions. The differences described here are with +respect to Perl version 5.38.0, but as both Perl and PCRE2 are continually +changing, the information may at times be out of date. +.P +1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +.P +2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +have are given in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +.P +3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +they do not mean what you might think. For example, (?!a){3} does not assert +that the next three characters are not "a". It just asserts that the next +character is not "a" three times (in principle; PCRE2 optimizes this to run the +assertion just once). Perl allows some repeat quantifiers on other assertions, +for example, \eb* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. +.P +4. If a braced quantifier such as {1,2} appears where there is nothing to +repeat (for example, at the start of a branch), PCRE2 raises an error whereas +Perl treats the quantifier characters as literal. +.P +5. Capture groups that occur inside negative lookaround assertions are counted, +but their entries in the offsets vector are set only when a negative assertion +is a condition that has a matching branch (that is, the condition is false). +Perl may set such capture groups in other circumstances. +.P +6. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu, +\eU, and \eN when followed by a character name. \eN on its own, matching a +non-newline character, and \eN{U+dd..}, matching a Unicode code point, are +supported. The escapes that modify the case of following letters are +implemented by Perl's general string-handling and are not part of its pattern +matching engine. If any of these are encountered by PCRE2, an error is +generated by default. However, if either of the PCRE2_ALT_BSUX or +PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript +interprets them. +.P +7. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is +built with Unicode support (the default). The properties that can be tested +with \ep and \eP are limited to the general category properties such as Lu and +Nd, the derived properties Any and Lc (synonym L&), script names such as Greek +or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and +Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See +the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation for details. The long synonyms for property names that Perl +supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted +to prefix any of these properties with "Is". +.P +8. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters +in between are treated as literals. However, this is slightly different from +Perl in that $ and @ are also handled as literals inside the quotes. In Perl, +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \eQ +and \eE which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \eQ and \eE just like any other character. Note the +following examples: +.sp + Pattern PCRE2 matches Perl matches +.sp +.\" JOIN + \eQabc$xyz\eE abc$xyz abc followed by the + contents of $xyz + \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz + \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz + \eQA\eB\eE A\eB A\eB + \eQ\e\eE \e \e\eE +.sp +The \eQ...\eE sequence is recognized both inside and outside character classes +by both PCRE2 and Perl. Another difference from Perl is that any appearance of +\eQ or \eE inside what might otherwise be a quantifier causes PCRE2 not to +recognize the sequence as a quantifier. Perl recognizes a quantifier if +(redundantly) either of the numbers is inside \eQ...\eE, but not if the +separating comma is. When not recognized as a quantifier a sequence such as +{\eQ1\eE,2} is treated as the literal string "{1,2}". +.P +9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +constructions. However, PCRE2 does have a "callout" feature, which allows an +external function to be called during pattern matching. See the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details. +.P +10. Subroutine calls (whether recursive or not) were treated as atomic groups +up to PCRE2 release 10.23, but from release 10.30 this changed, and +backtracking into subroutine calls is now supported, as in Perl. +.P +11. In PCRE2, if any of the backtracking control verbs are used in a group that +is called as a subroutine (whether or not recursively), their effect is +confined to that group; it does not extend to the surrounding pattern. This is +not always the case in Perl. In particular, if (*THEN) is present in a group +that is called as a subroutine, its action is limited to that group, even if +the group does not contain any | characters. Note that such groups are +processed as anchored at the point where they are tested. PCRE2 also confines +all control verbs within atomic assertions, again including (*THEN) in +assertions with only one branch. +.P +12. If a pattern contains more than one backtracking control verb, the first +one that is backtracked onto acts. For example, in the pattern +A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C +triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the +same as PCRE2, but there are cases where it differs. +.P +13. There are some differences that are concerned with the settings of captured +strings when part of a pattern is repeated. For example, matching "aba" against +the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to +"b". +.P +14. PCRE2's handling of duplicate capture group numbers and names is not as +general as Perl's. This is a consequence of the fact the PCRE2 works internally +just with numbers, using an external table to translate between numbers and +names. In particular, a pattern such as (?|(?A)|(?B)), where the two +capture groups have the same number but different names, is not supported, and +causes an error at compile time. If it were allowed, it would not be possible +to distinguish which group matched, because both names map to capture group +number 1. To avoid this confusing situation, an error is given at compile time. +.P +15. Perl used to recognize comments in some places that PCRE2 does not, for +example, between the ( and ? at the start of a group. If the /x modifier is +set, Perl allowed white space between ( and ? though the latest Perls give an +error (for a while it was just deprecated). There may still be some cases where +Perl behaves differently. +.P +16. Perl, when in warning mode, gives warnings for character classes such as +[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no +warning features, so it gives an error in these cases because they are almost +certainly user mistakes. +.P +17. In PCRE2, until release 10.45, the upper/lower case character properties Lu +and Ll were not affected when case-independent matching was specified. Perl has +changed in this respect, and PCRE2 has now changed to match. When caseless +matching is in force, Lu, Ll, and Lt (title case) are all treated as Lc (cased +letter). +.P +18. From release 5.32.0, Perl locks out the use of \eK in lookaround +assertions. From release 10.38 PCRE2 does the same by default. However, there +is an option for re-enabling the previous behaviour. When this option is set, +\eK is acted on when it occurs in positive assertions, but is ignored in +negative assertions. +.P +19. PCRE2 provides some extensions to the Perl regular expression facilities. +Perl 5.10 included new features that were not in earlier versions of Perl, some +of which (such as named parentheses) were in PCRE2 for some time before. This +list is with respect to Perl 5.38: +.sp +(a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $ +meta-character matches only at the very end of the string. +.sp +(b) A backslash followed by a letter with no special meaning is faulted. (Perl +can be made to issue a warning.) +.sp +(c) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is +inverted, that is, by default they are not greedy, but if followed by a +question mark they are. +.sp +(d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried +only at the first matching position in the subject string. +.sp +(e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART +options have no Perl equivalents. +.sp +(f) The \eR escape sequence can be restricted to match only CR, LF, or CRLF +by the PCRE2_BSR_ANYCRLF option. +.sp +(g) The callout facility is PCRE2-specific. Perl supports codeblocks and +variable interpolation, but not general hooks on every match. +.sp +(h) The partial matching facility is PCRE2-specific. +.sp +(i) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a +different way and is not Perl-compatible. +.sp +(j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at +the start of a pattern. These set overall options that cannot be changed within +the pattern. +.sp +(k) PCRE2 supports non-atomic positive lookaround assertions. This is an +extension to the lookaround facilities. The default, Perl-compatible +lookarounds are atomic. +.sp +(l) There are three syntactical items in patterns that can refer to a capturing +group by number: back references such as \eg{2}, subroutine calls such as (?3), +and condition references such as (?(4)...). PCRE2 supports relative group +numbers such as +2 and -4 in all three cases. Perl supports both plus and minus +for subroutine calls, but only minus for back references, and no relative +numbering at all for conditions. +.sp +(m) The scan substring assertion (syntax (*scs:(n)...)) is a PCRE2 extension +that is not available in Perl. +.P +20. Perl has different limits than PCRE2. See the +.\" HREF +\fBpcre2limit\fP +.\" +documentation for details. Perl went with 5.10 from recursion to iteration +keeping the intermediate matches on the heap, which is ~10% slower but does not +fall into any stack-overflow limit. PCRE2 made a similar change at release +10.30, and also has many build-time and run-time customizable limits. +.P +21. Unlike Perl, PCRE2 doesn't have character set modifiers and specially no way +to set characters by context just like Perl's "/d". A regular expression using +PCRE2_UTF and PCRE2_UCP will use similar rules to Perl's "/u"; something closer +to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top. +.P +22. Some recursive patterns that Perl diagnoses as infinite recursions can be +handled by PCRE2, either by the interpreter or the JIT. An example is +/(?:|(?0)abcd)(?(R)|\ez)/, which matches a sequence of any number of repeated +"abcd" substrings at the end of the subject. +.P +23. Both PCRE2 and Perl error when \ex{ escapes are invalid, but Perl tries to +recover and prints a warning if the problem was that an invalid hexadecimal +digit was found, since PCRE2 doesn't have warnings it returns an error instead. +Additionally, Perl accepts \ex{} and generates NUL unlike PCRE2. +.P +24. From release 10.45, PCRE2 gives an error if \ex is not followed by a +hexadecimal digit or a curly bracket. It used to interpret this as the NUL +character. Perl still generates NUL, but warns when in warning mode in most +cases. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 02 October 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2convert.3 b/3rd/pcre2/doc/pcre2convert.3 new file mode 100644 index 00000000..9f07f51d --- /dev/null +++ b/3rd/pcre2/doc/pcre2convert.3 @@ -0,0 +1,164 @@ +.TH PCRE2CONVERT 3 "14 November 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "EXPERIMENTAL PATTERN CONVERSION FUNCTIONS" +.rs +.sp +This document describes a set of functions that can be used to convert +"foreign" patterns into PCRE2 regular expressions. This facility is currently +experimental, and may be changed in future releases. Two kinds of pattern, +globs and POSIX patterns, are supported. +. +. +.SH "THE CONVERT CONTEXT" +.rs +.sp +.nf +.B pcre2_convert_context *pcre2_convert_context_create( +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B pcre2_convert_context *pcre2_convert_context_copy( +.B " pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_convert_context_free(pcre2_convert_context *\fIcvcontext\fP); +.sp +.B int pcre2_set_glob_escape(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIescape_char\fP);" +.sp +.B int pcre2_set_glob_separator(pcre2_convert_context *\fIcvcontext\fP, +.B " uint32_t \fIseparator_char\fP);" +.fi +.sp +A convert context is used to hold parameters that affect the way that pattern +conversion works. Like all PCRE2 contexts, you need to use a context only if +you want to override the defaults. There are the usual create, copy, and free +functions. If custom memory management functions are set in a general context +that is passed to \fBpcre2_convert_context_create()\fP, they are used for all +memory management within the conversion functions. +.P +There are only two parameters in the convert context at present. Both apply +only to glob conversions. The escape character defaults to grave accent under +Windows, otherwise backslash. It can be set to zero, meaning no escape +character, or to any punctuation character with a code point less than 256. +The separator character defaults to backslash under Windows, otherwise forward +slash. It can be set to forward slash, backslash, or dot. +.P +The two setting functions return zero on success, or PCRE2_ERROR_BADDATA if +their second argument is invalid. +. +. +.SH "THE CONVERSION FUNCTION" +.rs +.sp +.nf +.B int pcre2_pattern_convert(PCRE2_SPTR \fIpattern\fP, PCRE2_SIZE \fIlength\fP, +.B " uint32_t \fIoptions\fP, PCRE2_UCHAR **\fIbuffer\fP," +.B " PCRE2_SIZE *\fIblength\fP, pcre2_convert_context *\fIcvcontext\fP);" +.sp +.B void pcre2_converted_pattern_free(PCRE2_UCHAR *\fIconverted_pattern\fP); +.fi +.sp +The first two arguments of \fBpcre2_pattern_convert()\fP define the foreign +pattern that is to be converted. The length may be given as +PCRE2_ZERO_TERMINATED. The \fBoptions\fP argument defines how the pattern is to +be processed. If the input is UTF, the PCRE2_CONVERT_UTF option should be set. +PCRE2_CONVERT_NO_UTF_CHECK may also be set if you are sure the input is valid. +One or more of the glob options, or one of the following POSIX options must be +set to define the type of conversion that is required: +.sp + PCRE2_CONVERT_GLOB + PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR + PCRE2_CONVERT_GLOB_NO_STARSTAR + PCRE2_CONVERT_POSIX_BASIC + PCRE2_CONVERT_POSIX_EXTENDED +.sp +Details of the conversions are given below. The \fBbuffer\fP and \fBblength\fP +arguments define how the output is handled: +.P +If \fBbuffer\fP is NULL, the function just returns the length of the converted +pattern via \fBblength\fP. This is one less than the length of buffer needed, +because a terminating zero is always added to the output. +.P +If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using +the allocator in the context or \fBmalloc()\fP if no context is supplied. A +pointer to this buffer is placed in the variable to which \fBbuffer\fP points. +When no longer needed the output buffer must be freed by calling +\fBpcre2_converted_pattern_free()\fP. If this function is called with a NULL +argument, it returns immediately without doing anything. +.P +If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the +actual length of the buffer provided (in code units). +.P +In all cases, after successful conversion, the variable pointed to by +\fBblength\fP is updated to the length actually used (in code units), excluding +the terminating zero that is always added. +.P +If an error occurs, the length (via \fBblength\fP) is set to the offset +within the input pattern where the error was detected. Only gross syntax errors +are caught; there are plenty of errors that will get passed on for +\fBpcre2_compile()\fP to discover. +.P +The return from \fBpcre2_pattern_convert()\fP is zero on success or a non-zero +PCRE2 error code. Note that PCRE2 error codes may be positive or negative: +\fBpcre2_compile()\fP uses mostly positive codes and \fBpcre2_match()\fP +negative ones; \fBpcre2_convert()\fP uses existing codes of both kinds. A +textual error message can be obtained by calling +\fBpcre2_get_error_message()\fP. +. +. +.SH "CONVERTING GLOBS" +.rs +.sp +Globs are used to match file names, and consequently have the concept of a +"path separator", which defaults to backslash under Windows and forward slash +otherwise. If PCRE2_CONVERT_GLOB is set, the wildcards * and ? are not +permitted to match separator characters, but the double-star (**) feature +(which does match separators) is supported. +.P +PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR matches globs with wildcards allowed to +match separator characters. PCRE2_CONVERT_GLOB_NO_STARSTAR matches globs with +the double-star feature disabled. These options may be given together. +. +. +.SH "CONVERTING POSIX PATTERNS" +.rs +.sp +POSIX defines two kinds of regular expression pattern: basic and extended. +These can be processed by setting PCRE2_CONVERT_POSIX_BASIC or +PCRE2_CONVERT_POSIX_EXTENDED, respectively. +.P +In POSIX patterns, backslash is not special in a character class. Unmatched +closing parentheses are treated as literals. +.P +In basic patterns, ? + | {} and () must be escaped to be recognized +as metacharacters outside a character class. If the first character in the +pattern is * it is treated as a literal. ^ is a metacharacter only at the start +of a branch. +.P +In extended patterns, a backslash not in a character class always +makes the next character literal, whatever it is. There are no backreferences. +.P +Note: POSIX mandates that the longest possible match at the first matching +position must be found. This is not what \fBpcre2_match()\fP does; it yields +the first match that is found. An application can use \fBpcre2_dfa_match()\fP +to find the longest match, but that does not support backreferences (but then +neither do POSIX extended patterns). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 14 November 2023 +Copyright (c) 1997-2018 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2demo.3 b/3rd/pcre2/doc/pcre2demo.3 new file mode 100644 index 00000000..fc424357 --- /dev/null +++ b/3rd/pcre2/doc/pcre2demo.3 @@ -0,0 +1,526 @@ +.TH PCRE2DEMO 3 "31 August 2021" "PCRE2 10.45" +.\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT! +.SH NAME +PCRE2DEMO - A demonstration C program for PCRE2 +.SH "SOURCE CODE" +.rs +.sp +.\" Start example. +.de EX +. do ds mF \\n[.fam] +. nr mE \\n(.f +. nf +. nh +. do fam C +. ft CW +.. +. +. +.\" End example. +.de EE +. do fam \\*(mF +. ft \\n(mE +. fi +. hy \\n(HY +.. +. +.RS -7 +.EX +/************************************************* +* PCRE2 DEMONSTRATION PROGRAM * +*************************************************/ + +/* This is a demonstration program to illustrate a straightforward way of +using the PCRE2 regular expression library from a C program. See the +pcre2sample documentation for a short discussion ("man pcre2sample" if you have +the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is +incompatible with the original PCRE API. + +There are actually three libraries, each supporting a different code unit +width. This demonstration program uses the 8-bit library. The default is to +process each code unit as a separate character, but if the pattern begins with +"(*UTF)", both it and the subject are treated as UTF-8 strings, where +characters may occupy multiple code units. + +In Unix-like environments, if PCRE2 is installed in your standard system +libraries, you should be able to compile this program using this command: + +cc -Wall pcre2demo.c -lpcre2-8 -o pcre2demo + +If PCRE2 is not installed in a standard place, it is likely to be installed +with support for the pkg-config mechanism. If you have pkg-config, you can +compile this program using this command: + +cc -Wall pcre2demo.c `pkg-config --cflags --libs libpcre2-8` -o pcre2demo + +If you do not have pkg-config, you may have to use something like this: + +cc -Wall pcre2demo.c -I/usr/local/include -L/usr/local/lib \e + -R/usr/local/lib -lpcre2-8 -o pcre2demo + +Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and +library files for PCRE2 are installed on your system. Only some operating +systems (Solaris is one) use the -R option. + +Building under Windows: + +If you want to statically link this program against a non-dll .a file, you must +define PCRE2_STATIC before including pcre2.h, so in this environment, uncomment +the following line. */ + +/* #define PCRE2_STATIC */ + +/* The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. +For a program that uses only one code unit width, setting it to 8, 16, or 32 +makes it possible to use generic function names such as pcre2_compile(). Note +that just changing 8 to 16 (for example) is not sufficient to convert this +program to process 16-bit characters. Even in a fully 16-bit environment, where +string-handling functions such as strcmp() and printf() work with 16-bit +characters, the code for handling the table of named substrings will still need +to be modified. */ + +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include +#include +#include + + +/************************************************************************** +* Here is the program. The API includes the concept of "contexts" for * +* setting up unusual interface requirements for compiling and matching, * +* such as custom memory managers and non-standard newline definitions. * +* This program does not do any of this, so it makes no use of contexts, * +* always passing NULL where a context could be given. * +**************************************************************************/ + +int main(int argc, char **argv) +{ +pcre2_code *re; +PCRE2_SPTR pattern; /* PCRE2_SPTR is a pointer to unsigned code units of */ +PCRE2_SPTR subject; /* the appropriate width (in this case, 8 bits). */ +PCRE2_SPTR name_table; + +int crlf_is_newline; +int errornumber; +int find_all; +int i; +int rc; +int utf8; + +uint32_t option_bits; +uint32_t namecount; +uint32_t name_entry_size; +uint32_t newline; + +PCRE2_SIZE erroroffset; +PCRE2_SIZE *ovector; +PCRE2_SIZE subject_length; + +pcre2_match_data *match_data; + + +/************************************************************************** +* First, sort out the command line. There is only one possible option at * +* the moment, "-g" to request repeated matching to find all occurrences, * +* like Perl's /g option. We set the variable find_all to a non-zero value * +* if the -g option is present. * +**************************************************************************/ + +find_all = 0; +for (i = 1; i < argc; i++) + { + if (strcmp(argv[i], "-g") == 0) find_all = 1; + else if (argv[i][0] == '-') + { + printf("Unrecognised option %s\en", argv[i]); + return 1; + } + else break; + } + +/* After the options, we require exactly two arguments, which are the pattern, +and the subject string. */ + +if (argc - i != 2) + { + printf("Exactly two arguments required: a regex and a subject string\en"); + return 1; + } + +/* Pattern and subject are char arguments, so they can be straightforwardly +cast to PCRE2_SPTR because we are working in 8-bit code units. The subject +length is cast to PCRE2_SIZE for completeness, though PCRE2_SIZE is in fact +defined to be size_t. */ + +pattern = (PCRE2_SPTR)argv[i]; +subject = (PCRE2_SPTR)argv[i+1]; +subject_length = (PCRE2_SIZE)strlen((char *)subject); + + +/************************************************************************* +* Now we are going to compile the regular expression pattern, and handle * +* any errors that are detected. * +*************************************************************************/ + +re = pcre2_compile( + pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + 0, /* default options */ + &errornumber, /* for error number */ + &erroroffset, /* for error offset */ + NULL); /* use default compile context */ + +/* Compilation failed: print the error message and exit. */ + +if (re == NULL) + { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); + printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, + buffer); + return 1; + } + + +/************************************************************************* +* If the compilation succeeded, we call PCRE2 again, in order to do a * +* pattern match against the subject string. This does just ONE match. If * +* further matching is needed, it will be done below. Before running the * +* match we must set up a match_data block for holding the result. Using * +* pcre2_match_data_create_from_pattern() ensures that the block is * +* exactly the right size for the number of capturing parentheses in the * +* pattern. If you need to know the actual size of a match_data block as * +* a number of bytes, you can find it like this: * +* * +* PCRE2_SIZE match_data_size = pcre2_get_match_data_size(match_data); * +*************************************************************************/ + +match_data = pcre2_match_data_create_from_pattern(re, NULL); + +/* Now run the match. */ + +rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + +/* Matching failed: handle error cases */ + +if (rc < 0) + { + switch(rc) + { + case PCRE2_ERROR_NOMATCH: printf("No match\en"); break; + /* + Handle other special cases if you like + */ + default: printf("Matching error %d\en", rc); break; + } + pcre2_match_data_free(match_data); /* Release memory used for the match */ + pcre2_code_free(re); /* data and the compiled pattern. */ + return 1; + } + +/* Match succeeded. Get a pointer to the output vector, where string offsets +are stored. */ + +ovector = pcre2_get_ovector_pointer(match_data); +printf("Match succeeded at offset %d\en", (int)ovector[0]); + + +/************************************************************************* +* We have found the first match within the subject string. If the output * +* vector wasn't big enough, say so. Then output any substrings that were * +* captured. * +*************************************************************************/ + +/* The output vector wasn't big enough. This should not happen, because we used +pcre2_match_data_create_from_pattern() above. */ + +if (rc == 0) + printf("ovector was not big enough for all the captured substrings\en"); + +/* Since release 10.38 PCRE2 has locked out the use of \eK in lookaround +assertions. However, there is an option to re-enable the old behaviour. If that +is set, it is possible to run patterns such as /(?=.\eK)/ that use \eK in an +assertion to set the start of a match later than its end. In this demonstration +program, we show how to detect this case, but it shouldn't arise because the +option is never set. */ + +if (ovector[0] > ovector[1]) + { + printf("\e\eK was used in an assertion to set the match start after its end.\en" + "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\en"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + +/* Show substrings stored in the output vector by number. Obviously, in a real +application you might want to do things other than print them. */ + +for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + PCRE2_SIZE substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); + } + + +/************************************************************************** +* That concludes the basic part of this demonstration program. We have * +* compiled a pattern, and performed a single match. The code that follows * +* shows first how to access named substrings, and then how to code for * +* repeated matches on the same subject. * +**************************************************************************/ + +/* See if there are any named substrings, and if so, show them by name. First +we have to extract the count of named parentheses from the pattern. */ + +(void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + +if (namecount == 0) printf("No named substrings\en"); else + { + PCRE2_SPTR tabptr; + printf("Named substrings\en"); + + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void)pcre2_pattern_info( + re, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. In the 8-bit library the number is held in two + bytes, most significant first. */ + + tabptr = name_table; + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + + +/************************************************************************* +* If the "-g" option was given on the command line, we want to continue * +* to search for additional matches in the subject string, in a similar * +* way to the /g option in Perl. This turns out to be trickier than you * +* might think because of the possibility of matching an empty string. * +* What happens is as follows: * +* * +* If the previous match was NOT for an empty string, we can just start * +* the next match at the end of the previous one. * +* * +* If the previous match WAS for an empty string, we can't do that, as it * +* would lead to an infinite loop. Instead, a call of pcre2_match() is * +* made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * +* first of these tells PCRE2 that an empty string at the start of the * +* subject is not a valid match; other possibilities must be tried. The * +* second flag restricts PCRE2 to one match attempt at the initial string * +* position. If this match succeeds, an alternative to the empty string * +* match has been found, and we can print it and proceed round the loop, * +* advancing by the length of whatever was found. If this match does not * +* succeed, we still stay in the loop, advancing by just one character. * +* In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * +* more than one byte. * +* * +* However, there is a complication concerned with newlines. When the * +* newline convention is such that CRLF is a valid newline, we must * +* advance by two characters rather than one. The newline convention can * +* be set in the regex by (*CR), etc.; if not, we must find the default. * +*************************************************************************/ + +if (!find_all) /* Check for -g */ + { + pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_code_free(re); /* for the match data and the pattern. */ + return 0; /* Exit the program. */ + } + +/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline +sequence. First, find the options with which the regex was compiled and extract +the UTF state. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_ALLOPTIONS, &option_bits); +utf8 = (option_bits & PCRE2_UTF) != 0; + +/* Now find the newline convention and see whether CRLF is a valid newline +sequence. */ + +(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); +crlf_is_newline = newline == PCRE2_NEWLINE_ANY || + newline == PCRE2_NEWLINE_CRLF || + newline == PCRE2_NEWLINE_ANYCRLF; + +/* Loop for second and subsequent matches */ + +for (;;) + { + uint32_t options = 0; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) + { + if (ovector[0] == subject_length) break; + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /* If the previous match was not an empty string, there is one tricky case to + consider. If a pattern contains \eK within a lookbehind assertion at the + start, the end of the matched string can be at the offset where the match + started. Without special action, this leads to a loop that keeps on matching + the same substring. We must detect this case and arrange to move the start on + by one character. The pcre2_get_startchar() function returns the starting + offset that was passed to pcre2_match(). */ + + else + { + PCRE2_SIZE startchar = pcre2_get_startchar(match_data); + if (start_offset <= startchar) + { + if (startchar >= subject_length) break; /* Reached end of subject. */ + start_offset = startchar + 1; /* Advance by one character. */ + if (utf8) /* If UTF-8, it may be more */ + { /* than one code unit. */ + for (; start_offset < subject_length; start_offset++) + if ((subject[start_offset] & 0xc0) != 0x80) break; + } + } + } + + /* Run the next matching operation */ + + rc = pcre2_match( + re, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + NULL); /* use default match context */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. */ + + if (rc == PCRE2_ERROR_NOMATCH) + { + if (options == 0) break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one code unit */ + if (crlf_is_newline && /* If CRLF is a newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\er' && + subject[start_offset + 1] == '\en') + ovector[1] += 1; /* Advance by one more. */ + else if (utf8) /* Otherwise, ensure we */ + { /* advance a whole UTF-8 */ + while (ovector[1] < subject_length) /* character. */ + { + if ((subject[ovector[1]] & 0xc0) != 0x80) break; + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) + { + printf("Matching error %d\en", rc); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* Match succeeded */ + + printf("\enMatch succeeded again at offset %d\en", (int)ovector[0]); + + /* The match succeeded, but the output vector wasn't big enough. This + should not happen. */ + + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\en"); + + /* We must guard against patterns such as /(?=.\eK)/ that use \eK in an + assertion to set the start of a match later than its end. In this + demonstration program, we just detect this case and give up. */ + + if (ovector[0] > ovector[1]) + { + printf("\e\eK was used in an assertion to set the match start after its end.\en" + "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), + (char *)(subject + ovector[1])); + printf("Run abandoned\en"); + pcre2_match_data_free(match_data); + pcre2_code_free(re); + return 1; + } + + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + + for (i = 0; i < rc; i++) + { + PCRE2_SPTR substring_start = subject + ovector[2*i]; + size_t substring_length = ovector[2*i+1] - ovector[2*i]; + printf("%2d: %.*s\en", i, (int)substring_length, (char *)substring_start); + } + + if (namecount == 0) printf("No named substrings\en"); else + { + PCRE2_SPTR tabptr = name_table; + printf("Named substrings\en"); + for (i = 0; i < namecount; i++) + { + int n = (tabptr[0] << 8) | tabptr[1]; + printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2, + (int)(ovector[2*n+1] - ovector[2*n]), subject + ovector[2*n]); + tabptr += name_entry_size; + } + } + } /* End of loop to find second and subsequent matches */ + +printf("\en"); +pcre2_match_data_free(match_data); +pcre2_code_free(re); +return 0; +} + +/* End of pcre2demo.c */ +.EE diff --git a/3rd/pcre2/doc/pcre2grep.1 b/3rd/pcre2/doc/pcre2grep.1 new file mode 100644 index 00000000..0b6212ef --- /dev/null +++ b/3rd/pcre2/doc/pcre2grep.1 @@ -0,0 +1,1027 @@ +.TH PCRE2GREP 1 "04 February 2025" "PCRE2 10.45" +.SH NAME +pcre2grep - a grep with Perl-compatible regular expressions. +.SH SYNOPSIS +.B pcre2grep [options] [long options] [pattern] [path1 path2 ...] +. +.SH DESCRIPTION +.rs +.sp +\fBpcre2grep\fP searches files for character patterns, in the same way as other +grep commands do, but it uses the PCRE2 regular expression library to support +patterns that are compatible with the regular expressions of Perl 5. See +.\" HREF +\fBpcre2syntax\fP(3) +.\" +for a quick-reference summary of pattern syntax, or +.\" HREF +\fBpcre2pattern\fP(3) +.\" +for a full description of the syntax and semantics of the regular expressions +that PCRE2 supports. +.P +Patterns, whether supplied on the command line or in a separate file, are given +without delimiters. For example: +.sp + pcre2grep Thursday /etc/motd +.sp +If you attempt to use delimiters (for example, by surrounding a pattern with +slashes, as is common in Perl scripts), they are interpreted as part of the +pattern. Quotes can of course be used to delimit patterns on the command line +because they are interpreted by the shell, and indeed quotes are required if a +pattern contains white space or shell metacharacters. +.P +The first argument that follows any option settings is treated as the single +pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present. +Conversely, when one or both of these options are used to specify patterns, all +arguments are treated as path names. At least one of \fB-e\fP, \fB-f\fP, or an +argument pattern must be provided. +.P +If no files are specified, \fBpcre2grep\fP reads the standard input. The +standard input can also be referenced by a name consisting of a single hyphen. +For example: +.sp + pcre2grep some-pattern file1 - file3 +.sp +By default, input files are searched line by line, so pattern assertions about +the beginning and end of a subject string (^, $, \eA, \eZ, and \ez) match at +the beginning and end of each line. When a line matches a pattern, it is copied +to the standard output, and if there is more than one file, the file name is +output at the start of each line, followed by a colon. However, there are +options that can change how \fBpcre2grep\fP behaves. For example, the \fB-M\fP +option makes it possible to search for strings that span line boundaries. What +defines a line boundary is controlled by the \fB-N\fP (\fB--newline\fP) option. +The \fB-h\fP and \fB-H\fP options control whether or not file names are shown, +and the \fB-Z\fP option changes the file name terminator to a zero byte. +.P +The amount of memory used for buffering files that are being scanned is +controlled by parameters that can be set by the \fB--buffer-size\fP and +\fB--max-buffer-size\fP options. The first of these sets the size of buffer +that is obtained at the start of processing. If an input file contains very +long lines, a larger buffer may be needed; this is handled by automatically +extending the buffer, up to the limit specified by \fB--max-buffer-size\fP. The +default values for these parameters can be set when \fBpcre2grep\fP is +built; if nothing is specified, the defaults are set to 20KiB and 1MiB +respectively. An error occurs if a line is too long and the buffer can no +longer be expanded. +.P +The block of memory that is actually used is three times the "buffer size", to +allow for buffering "before" and "after" lines. If the buffer size is too +small, fewer than requested "before" and "after" lines may be output. +.P +When matching with a multiline pattern, the size of the buffer must be at least +half of the maximum match expected or the pattern might fail to match. +.P +Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater. +BUFSIZ is defined in \fB\fP. When there is more than one pattern +(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to +each line in the order in which they are defined, except that all the \fB-e\fP +patterns are tried before the \fB-f\fP patterns. +.P +By default, as soon as one pattern matches a line, no further patterns are +considered. However, if \fB--colour\fP (or \fB--color\fP) is used to colour the +matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, +\fB--line-offsets\fP, or \fB--output\fP is used to output only the part of the +line that matched (either shown literally, or as an offset), the behaviour is +different. In this situation, all the patterns are applied to the line. If +there is more than one match, the one that begins nearest to the start of the +subject is processed; if there is more than one match at that position, the one +with the longest matching substring is processed; if the matching substrings +are equal, the first match found is processed. +.P +Scanning with all the patterns resumes immediately following the match, so that +later matches on the same line can be found. Note, however, that an overlapping +match that starts in the middle of another match will not be processed. +.P +The above behaviour was changed at release 10.41 to be more compatible with GNU +grep. In earlier releases, \fBpcre2grep\fP did not recognize matches from +later patterns that were earlier in the subject. +.P +Patterns that can match an empty string are accepted, but empty string +matches are never recognized. An example is the pattern "(super)?(man)?", in +which all components are optional. This pattern finds all occurrences of both +"super" and "man"; the output differs from matching with "super|man" when only +the matching substrings are being shown. +.P +If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set, +\fBpcre2grep\fP uses the value to set a locale when calling the PCRE2 library. +The \fB--locale\fP option can be used to override this. +. +. +.SH "SUPPORT FOR COMPRESSED FILES" +.rs +.sp +Compile-time options for \fBpcre2grep\fP can set it up to use \fBlibz\fP or +\fBlibbz2\fP for reading compressed files whose names end in \fB.gz\fP or +\fB.bz2\fP, respectively. You can find out whether your \fBpcre2grep\fP binary +has support for one or both of these file types by running it with the +\fB--help\fP option. If the appropriate support is not present, all files are +treated as plain text. The standard input is always so treated. If a file with +a \fB.gz\fP or \fB.bz2\fP extension is not in fact compressed, it is read as a +plain text file. When input is from a compressed .gz or .bz2 file, the +\fB--line-buffered\fP option is ignored. +. +. +.SH "BINARY FILES" +.rs +.sp +By default, a file that contains a binary zero byte within the first 1024 bytes +is identified as a binary file, and is processed specially. However, if the +newline type is specified as NUL, that is, the line terminator is a binary +zero, the test for a binary file is not applied. See the \fB--binary-files\fP +option for a means of changing the way binary files are handled. +. +. +.SH "BINARY ZEROS IN PATTERNS" +.rs +.sp +Patterns passed from the command line are strings that are terminated by a +binary zero, so cannot contain internal zeros. However, patterns that are read +from a file via the \fB-f\fP option may contain binary zeros. +. +. +.SH OPTIONS +.rs +.sp +The order in which some of the options appear can affect the output. For +example, both the \fB-H\fP and \fB-l\fP options affect the printing of file +names. Whichever comes later in the command line will be the one that takes +effect. Similarly, except where noted below, if an option is given twice, the +later setting is used. Numerical values for options may be followed by K or M, +to signify multiplication by 1024 or 1024*1024 respectively. +.TP 10 +\fB--\fP +This terminates the list of options. It is useful if the next item on the +command line starts with a hyphen but is not an option. This allows for the +processing of patterns and file names that start with hyphens. +.TP +\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP +Output up to \fInumber\fP lines of context after each matching line. Fewer +lines are output if the next match or the end of the file is reached, or if the +processing buffer size has been set too small. If file names and/or line +numbers are being output, a hyphen separator is used instead of a colon for the +context lines (the \fB-Z\fP option can be used to change the file name +terminator to a zero byte). A line containing "--" is output between each group +of lines, unless they are in fact contiguous in the input file. The value of +\fInumber\fP is expected to be relatively small. When \fB-c\fP is used, +\fB-A\fP is ignored. +.TP +\fB-a\fP, \fB--text\fP +Treat binary files as text. This is equivalent to +\fB--binary-files\fP=\fItext\fP. +.TP +\fB--allow-lookaround-bsk\fP +PCRE2 now forbids the use of \eK in lookarounds by default, in line with Perl. +This option causes \fBpcre2grep\fP to set the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option, which enables this somewhat dangerous usage. +.TP +\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP +Output up to \fInumber\fP lines of context before each matching line. Fewer +lines are output if the previous match or the start of the file is within +\fInumber\fP lines, or if the processing buffer size has been set too small. If +file names and/or line numbers are being output, a hyphen separator is used +instead of a colon for the context lines (the \fB-Z\fP option can be used to +change the file name terminator to a zero byte). A line containing "--" is +output between each group of lines, unless they are in fact contiguous in the +input file. The value of \fInumber\fP is expected to be relatively small. When +\fB-c\fP is used, \fB-B\fP is ignored. +.TP +\fB--binary-files=\fP\fIword\fP +Specify how binary files are to be processed. If the word is "binary" (the +default), pattern matching is performed on binary files, but the only output is +"Binary file matches" when a match succeeds. If the word is "text", +which is equivalent to the \fB-a\fP or \fB--text\fP option, binary files are +processed in the same way as any other file. In this case, when a match +succeeds, the output may be binary garbage, which can have nasty effects if +sent to a terminal. If the word is "without-match", which is equivalent to the +\fB-I\fP option, binary files are not processed at all; they are assumed not to +be of interest and are skipped without causing any output or affecting the +return code. +.TP +\fB--buffer-size=\fP\fInumber\fP +Set the parameter that controls how much memory is obtained at the start of +processing for buffering files that are being scanned. See also +\fB--max-buffer-size\fP below. +.TP +\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP +Output \fInumber\fP lines of context both before and after each matching line. +This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value. +.TP +\fB-c\fP, \fB--count\fP +Do not output lines from the files that are being scanned; instead output the +number of lines that would have been shown, either because they matched, or, if +\fB-v\fP is set, because they failed to match. By default, this count is +exactly the same as the number of lines that would have been output, but if the +\fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more +suppressed lines than the count (that is, the number of matches). +.sp +If no lines are selected, the number zero is output. If several files are +being scanned, a count is output for each of them and the \fB-t\fP option can +be used to cause a total to be output at the end. However, if the +\fB--files-with-matches\fP option is also used, only those files whose counts +are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP, +\fB-B\fP, and \fB-C\fP options are ignored. +.TP +\fB--colour\fP, \fB--color\fP +If this option is given without any data, it is equivalent to "--colour=auto". +If data is required, it must be given in the same shell item, separated by an +equals sign. +.TP +\fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP +This option specifies under what circumstances the parts of a line that matched +a pattern should be coloured in the output. It is ignored if +\fB--file-offsets\fP, \fB--line-offsets\fP, or \fB--output\fP is set. By +default, output is not coloured. The value for the \fB--colour\fP option (which +is optional, see above) may be "never", "always", or "auto". In the latter +case, colouring happens only if the standard output is connected to a terminal. +More resources are used when colouring is enabled, because \fBpcre2grep\fP has +to search for all possible matches in a line, not just one, in order to colour +them all. +.sp +The colour that is used can be specified by setting one of the environment +variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or +PCREGREP_COLOR, which are checked in that order. If none of these are set, +\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value +of the variable should be a string of two numbers, separated by a semicolon, +except in the case of GREP_COLORS, which must start with "ms=" or "mt=" +followed by two semicolon-separated colours, terminated by the end of the +string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is +ignored, and GREP_COLOR is checked. +.sp +If the string obtained from one of the above variables contains any characters +other than semicolon or digits, the setting is ignored and the default colour +is used. The string is copied directly into the control string for setting +colour on a terminal, so it is your responsibility to ensure that the values +make sense. If no relevant environment variable is set, the default is "1;31", +which gives red. +.TP +\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP +If an input path is not a regular file or a directory, "action" specifies how +it is to be processed. Valid values are "read" (the default) or "skip" +(silently skip the path). +.TP +\fB-d\fP \fIaction\fP, \fB--directories=\fP\fIaction\fP +If an input path is a directory, "action" specifies how it is to be processed. +Valid values are "read" (the default in non-Windows environments, for +compatibility with GNU grep), "recurse" (equivalent to the \fB-r\fP option), or +"skip" (silently skip the path, the default in Windows environments). In the +"read" case, directories are read as if they were ordinary files. In some +operating systems the effect of reading a directory like this is an immediate +end-of-file; in others it may provoke an error. +.TP +\fB--depth-limit\fP=\fInumber\fP +See \fB--match-limit\fP below. +.TP +\fB-E\fP, \fB--case-restrict\fP +When case distinctions are being ignored in Unicode mode, two ASCII letters (K +and S) will by default match Unicode characters U+212A (Kelvin sign) and U+017F +(long S) respectively, as well as their lower case ASCII counterparts. When +this option is set, case equivalences are restricted such that no ASCII +character matches a non-ASCII character, and vice versa. +.TP +\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP +Specify a pattern to be matched. This option can be used multiple times in +order to specify several patterns. It can also be used as a way of specifying a +single pattern that starts with a hyphen. When \fB-e\fP is used, no argument +pattern is taken from the command line; all arguments are treated as file +names. There is no limit to the number of patterns. They are applied to each +line in the order in which they are defined. +.sp +If \fB-f\fP is used with \fB-e\fP, the command line patterns are matched first, +followed by the patterns from the file(s), independent of the order in which +these options are specified. +.TP +\fB--exclude\fP=\fIpattern\fP +Files (but not directories) whose names match the pattern are skipped without +being processed. This applies to all files, whether listed on the command line, +obtained from \fB--file-list\fP, or by scanning a directory. The pattern is a +PCRE2 regular expression, and is matched against the final component of the +file name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do +not apply to this pattern. The option may be given any number of times in order +to specify multiple patterns. If a file name matches both an \fB--include\fP +and an \fB--exclude\fP pattern, it is excluded. There is no short form for this +option. +.TP +\fB--exclude-from=\fP\fIfilename\fP +Treat each non-empty line of the file as the data for an \fB--exclude\fP +option. What constitutes a newline when reading the file is the operating +system's default. The \fB--newline\fP option has no effect on this option. This +option may be given more than once in order to specify a number of files to +read. +.TP +\fB--exclude-dir\fP=\fIpattern\fP +Directories whose names match the pattern are skipped without being processed, +whatever the setting of the \fB--recursive\fP option. This applies to all +directories, whether listed on the command line, obtained from +\fB--file-list\fP, or by scanning a parent directory. The pattern is a PCRE2 +regular expression, and is matched against the final component of the directory +name, not the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not +apply to this pattern. The option may be given any number of times in order to +specify more than one pattern. If a directory matches both \fB--include-dir\fP +and \fB--exclude-dir\fP, it is excluded. There is no short form for this +option. +.TP +\fB-F\fP, \fB--fixed-strings\fP +Interpret each data-matching pattern as a list of fixed strings, separated by +newlines, instead of as a regular expression. What constitutes a newline for +this purpose is controlled by the \fB--newline\fP option. The \fB-w\fP (match +as a word) and \fB-x\fP (match whole line) options can be used with \fB-F\fP. +They apply to each of the fixed strings. A line is selected if any of the fixed +strings are found in it (subject to \fB-w\fP or \fB-x\fP, if present). This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the \fB--include\fP or +\fB--exclude\fP options. +.TP +\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP +Read patterns from the file, one per line. As is the case with patterns on the +command line, no delimiters should be used. What constitutes a newline when +reading the file is the operating system's default interpretation of \en. The +\fB--newline\fP option has no effect on this option. Trailing white space is +removed from each line, and blank lines are ignored unless the +\fB--posix-pattern-file\fP option is also provided. An empty file contains no +patterns and therefore matches nothing. Patterns read from a file in this way +may contain binary zeros, which are treated as ordinary character literals. +.sp +If this option is given more than once, all the specified files are read. A +data line is output if any of the patterns match it. A file name can be given +as "-" to refer to the standard input. When \fB-f\fP is used, patterns +specified on the command line using \fB-e\fP may also be present; they are +matched before the file's patterns. However, no pattern is taken from the +command line; all arguments are treated as the names of paths to be searched. +.TP +\fB--file-list\fP=\fIfilename\fP +Read a list of files and/or directories that are to be scanned from the given +file, one per line. What constitutes a newline when reading the file is the +operating system's default. Trailing white space is removed from each line, and +blank lines are ignored. These paths are processed before any that are listed +on the command line. The file name can be given as "-" to refer to the standard +input. If \fB--file\fP and \fB--file-list\fP are both specified as "-", +patterns are read first. This is useful only when the standard input is a +terminal, from which further lines (the list of files) can be read after an +end-of-file indication. If this option is given more than once, all the +specified files are read. +.TP +\fB--file-offsets\fP +Instead of showing lines or parts of lines that match, show each match as an +offset from the start of the file and a length, separated by a comma. In this +mode, \fB--colour\fP has no effect, and no context is shown. That is, the +\fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is more than one +match in a line, each of them is shown separately. This option is mutually +exclusive with \fB--output\fP, \fB--line-offsets\fP, and \fB--only-matching\fP. +.TP +\fB--group-separator\fP=\fItext\fP +Output this text string instead of two hyphens between groups of lines when +\fB-A\fP, \fB-B\fP, or \fB-C\fP is in use. See also \fB--no-group-separator\fP. +.TP +\fB-H\fP, \fB--with-filename\fP +Force the inclusion of the file name at the start of output lines when +searching a single file. The file name is not normally shown in this case. +By default, for matching lines, the file name is followed by a colon; for +context lines, a hyphen separator is used. The \fB-Z\fP option can be used to +change the terminator to a zero byte. If a line number is also being output, +it follows the file name. When the \fB-M\fP option causes a pattern to match +more than one line, only the first is preceded by the file name. This option +overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options. +.TP +\fB-h\fP, \fB--no-filename\fP +Suppress the output file names when searching multiple files. File names are +normally shown when multiple files are searched. By default, for matching +lines, the file name is followed by a colon; for context lines, a hyphen +separator is used. The \fB-Z\fP option can be used to change the terminator to +a zero byte. If a line number is also being output, it follows the file name. +This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options. +.TP +\fB--heap-limit\fP=\fInumber\fP +See \fB--match-limit\fP below. +.TP +\fB--help\fP +Output a help message, giving brief details of the command options and file +type support, and then exit. Anything else on the command line is +ignored. +.TP +\fB-I\fP +Ignore binary files. This is equivalent to +\fB--binary-files\fP=\fIwithout-match\fP. +.TP +\fB-i\fP, \fB--ignore-case\fP +Ignore upper/lower case distinctions when pattern matching. This applies when +matching path names for inclusion or exclusion as well as when matching lines +in files. +.TP +\fB--include\fP=\fIpattern\fP +If any \fB--include\fP patterns are specified, the only files that are +processed are those whose names match one of the patterns and do not match an +\fB--exclude\fP pattern. This option does not affect directories, but it +applies to all files, whether listed on the command line, obtained from +\fB--file-list\fP, or by scanning a directory. The pattern is a PCRE2 regular +expression, and is matched against the final component of the file name, not +the entire path. The \fB-F\fP, \fB-w\fP, and \fB-x\fP options do not apply to +this pattern. The option may be given any number of times. If a file name +matches both an \fB--include\fP and an \fB--exclude\fP pattern, it is excluded. +There is no short form for this option. +.TP +\fB--include-from=\fP\fIfilename\fP +Treat each non-empty line of the file as the data for an \fB--include\fP +option. What constitutes a newline for this purpose is the operating system's +default. The \fB--newline\fP option has no effect on this option. This option +may be given any number of times; all the files are read. +.TP +\fB--include-dir\fP=\fIpattern\fP +If any \fB--include-dir\fP patterns are specified, the only directories that +are processed are those whose names match one of the patterns and do not match +an \fB--exclude-dir\fP pattern. This applies to all directories, whether listed +on the command line, obtained from \fB--file-list\fP, or by scanning a parent +directory. The pattern is a PCRE2 regular expression, and is matched against +the final component of the directory name, not the entire path. The \fB-F\fP, +\fB-w\fP, and \fB-x\fP options do not apply to this pattern. The option may be +given any number of times. If a directory matches both \fB--include-dir\fP and +\fB--exclude-dir\fP, it is excluded. There is no short form for this option. +.TP +\fB-L\fP, \fB--files-without-match\fP +Instead of outputting lines from the files, just output the names of the files +that do not contain any lines that would have been output. Each file name is +output once, on a separate line by default, but if the \fB-Z\fP option is set, +they are separated by zero bytes instead of newlines. This option overrides any +previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options. +.TP +\fB-l\fP, \fB--files-with-matches\fP +Instead of outputting lines from the files, just output the names of the files +containing lines that would have been output. Each file name is output once, on +a separate line, but if the \fB-Z\fP option is set, they are separated by zero +bytes instead of newlines. Searching normally stops as soon as a matching line +is found in a file. However, if the \fB-c\fP (count) option is also used, +matching continues in order to obtain the correct count, and those files that +have at least one match are listed along with their counts. Using this option +with \fB-c\fP is a way of suppressing the listing of files with no matches that +occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP, +\fB-h\fP, or \fB-L\fP options. +.TP +\fB--label\fP=\fIname\fP +This option supplies a name to be used for the standard input when file names +are being output. If not supplied, "(standard input)" is used. There is no +short form for this option. +.TP +\fB--line-buffered\fP +When this option is given, non-compressed input is read and processed line by +line, and the output is flushed after each write. By default, input is read in +large chunks, unless \fBpcre2grep\fP can determine that it is reading from a +terminal, which is currently possible only in Unix-like environments or +Windows. Output to terminal is normally automatically flushed by the operating +system. This option can be useful when the input or output is attached to a +pipe and you do not want \fBpcre2grep\fP to buffer up large amounts of data. +However, its use will affect performance, and the \fB-M\fP (multiline) option +ceases to work. When input is from a compressed .gz or .bz2 file, +\fB--line-buffered\fP is ignored. +.TP +\fB--line-offsets\fP +Instead of showing lines or parts of lines that match, show each match as a +line number, the offset from the start of the line, and a length. The line +number is terminated by a colon (as usual; see the \fB-n\fP option), and the +offset and length are separated by a comma. In this mode, \fB--colour\fP has no +effect, and no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP +options are ignored. If there is more than one match in a line, each of them is +shown separately. This option is mutually exclusive with \fB--output\fP, +\fB--file-offsets\fP, and \fB--only-matching\fP. +.TP +\fB--locale\fP=\fIlocale-name\fP +This option specifies a locale to be used for pattern matching. It overrides +the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no +locale is specified, the PCRE2 library's default (usually the "C" locale) is +used. There is no short form for this option. +.TP +\fB-M\fP, \fB--multiline\fP +Allow patterns to match more than one line. When this option is set, the PCRE2 +library is called in "multiline" mode, and a match is allowed to continue past +the end of the initial line and onto one or more subsequent lines. +.sp +Patterns used with \fB-M\fP may usefully contain literal newline characters and +internal occurrences of ^ and $ characters, because in multiline mode these can +match at internal newlines. Because \fBpcre2grep\fP is scanning multiple lines, +the \eZ and \ez assertions match only at the end of the last line in the file. +The \eA assertion matches at the start of the first line of a match. This can +be any line in the file; it is not anchored to the first line. +.sp +The output for a successful match may consist of more than one line. The first +line is the line in which the match started, and the last line is the line in +which the match ended. If the matched string ends with a newline sequence, the +output ends at the end of that line. If \fB-v\fP is set, none of the lines in a +multi-line match are output. Once a match has been handled, scanning restarts +at the beginning of the line after the one in which the match ended. +.sp +The newline sequence that separates multiple lines must be matched as part of +the pattern. For example, to find the phrase "regular expression" in a file +where "regular" might be at the end of a line and "expression" at the start of +the next line, you could use this command: +.sp + pcre2grep -M 'regular\es+expression' +.sp +The \es escape sequence matches any white space character, including newlines, +and is followed by + so as to match trailing white space on the first line as +well as possibly handling a two-character newline sequence. +.sp +There is a limit to the number of lines that can be matched, imposed by the way +that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently +large processing buffer, this should not be a problem. +.sp +The \fB-M\fP option does not work when input is read line by line (see +\fB--line-buffered\fP.) +.TP +\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP +Stop processing after finding \fInumber\fP matching lines, or non-matching +lines if \fB-v\fP is also set. Any trailing context lines are output after the +final match. In multiline mode, each multiline match counts as just one line +for this purpose. If this limit is reached when reading the standard input from +a regular file, the file is left positioned just after the last matching line. +If \fB-c\fP is also set, the count that is output is never greater than +\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or +\fB-q\fP, or when just checking for a match in a binary file. +.TP +\fB--match-limit\fP=\fInumber\fP +Processing some regular expression patterns may take a very long time to search +for all possible matching strings. Others may require a very large amount of +memory. There are three options that set resource limits for matching. +.sp +The \fB--match-limit\fP option provides a means of limiting computing resource +usage when processing patterns that are not going to match, but which have a +very large number of possibilities in their search trees. The classic example +is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a +counter that is incremented each time around its main processing loop. If the +value set by \fB--match-limit\fP is reached, an error occurs. +.sp +The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of +1024 bytes), the maximum amount of heap memory that may be used for matching. +.sp +The \fB--depth-limit\fP option limits the depth of nested backtracking points, +which indirectly limits the amount of memory that is used. The amount of memory +needed for each backtracking point depends on the number of capturing +parentheses in the pattern, so the amount of memory that is used before this +limit acts varies from pattern to pattern. This limit is of use only if it is +set smaller than \fB--match-limit\fP. +.sp +There are no short forms for these options. The default limits can be set +when the PCRE2 library is compiled; if they are not specified, the defaults +are very large and so effectively unlimited. +.TP +\fB--max-buffer-size\fP=\fInumber\fP +This limits the expansion of the processing buffer, whose initial size can be +set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no +smaller than the starting buffer size. +.TP +\fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP +Six different conventions for indicating the ends of lines in scanned files are +supported. For example: +.sp + pcre2grep -N CRLF 'some pattern' +.sp +The newline type may be specified in upper, lower, or mixed case. If the +newline type is NUL, lines are separated by binary zero characters. The other +types are the single-character sequences CR (carriage return) and LF +(linefeed), the two-character sequence CRLF, an "anycrlf" type, which +recognizes any of the preceding three types, and an "any" type, for which any +Unicode line ending sequence is assumed to end a line. The Unicode sequences +are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed, +U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS +(paragraph separator, U+2029). +.sp +When the PCRE2 library is built, a default line-ending sequence is specified. +This is normally the standard sequence for the operating system. Unless +otherwise specified by this option, \fBpcre2grep\fP uses the library's default. +.sp +This option makes it possible to use \fBpcre2grep\fP to scan files that have +come from other environments without having to modify their line endings. If +the data that is being scanned does not agree with the convention set by this +option, \fBpcre2grep\fP may behave in strange ways. Note that this option does +not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or +\fB--include-from\fP options, which are expected to use the operating system's +standard newline sequence. +.TP +\fB-n\fP, \fB--line-number\fP +Precede each output line by its line number in the file, followed by a colon +for matching lines or a hyphen for context lines. If the file name is also +being output, it precedes the line number. When the \fB-M\fP option causes a +pattern to match more than one line, only the first is preceded by its line +number. This option is forced if \fB--line-offsets\fP is used. +.TP +\fB--no-group-separator\fP +Do not output a separator between groups of lines when \fB-A\fP, \fB-B\fP, or +\fB-C\fP is in use. The default is to output a line containing two hyphens. See +also \fB--group-separator\fP. +.TP +\fB--no-jit\fP +If the PCRE2 library is built with support for just-in-time compiling (which +speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it +was explicitly disabled at build time. This option can be used to disable the +use of JIT at run time. It is provided for testing and working around problems. +It should never be needed in normal use. +.TP +\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP +When there is a match, instead of outputting the line that matched, output just +the text specified in this option, followed by an operating-system standard +newline. In this mode, \fB--colour\fP has no effect, and no context is shown. +That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. The +\fB--newline\fP option has no effect on this option, which is mutually +exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and +\fB--line-offsets\fP. However, like \fB--only-matching\fP, if there is more +than one match in a line, each of them causes a line of output. +.sp +Escape sequences starting with a dollar character may be used to insert the +contents of the matched part of the line and/or captured substrings into the +text. +.sp +$ or ${} is replaced by the captured substring of the given +decimal number; $& (or the legacy $0) substitutes the whole match. If the +number is greater than the number of capturing substrings, or if the capture +is unset, the replacement is empty. +.sp +$a is replaced by bell; $b by backspace; $e by escape; $f by form feed; $n by +newline; $r by carriage return; $t by tab; $v by vertical tab. +.sp +$o or $o{} is replaced by the character whose code point is the +given octal number. In the first form, up to three octal digits are processed. +When more digits are needed in Unicode mode to specify a wide character, the +second form must be used. +.sp +$x or $x{} is replaced by the character represented by the +given hexadecimal number. In the first form, up to two hexadecimal digits are +processed. When more digits are needed in Unicode mode to specify a wide +character, the second form must be used. +.sp +Any other character is substituted by itself. In particular, $$ is replaced by +a single dollar. +.TP +\fB-o\fP, \fB--only-matching\fP +Show only the part of the line that matched a pattern instead of the whole +line. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and +\fB-C\fP options are ignored. If there is more than one match in a line, each +of them is shown separately, on a separate line of output. If \fB-o\fP is +combined with \fB-v\fP (invert the sense of the match to find non-matching +lines), no output is generated, but the return code is set appropriately. If +the matched portion of the line is empty, nothing is output unless the file +name or line number are being printed, in which case they are shown on an +otherwise empty line. This option is mutually exclusive with \fB--output\fP, +\fB--file-offsets\fP and \fB--line-offsets\fP. +.TP +\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP +Show only the part of the line that matched the capturing parentheses of the +given number. Up to 50 capturing parentheses are supported by default. This +limit can be changed via the \fB--om-capture\fP option. A pattern may contain +any number of capturing parentheses, but only those whose number is within the +limit can be accessed by \fB-o\fP. An error occurs if the number specified by +\fB-o\fP is greater than the limit. +.sp +-o0 is the same as \fB-o\fP without a number. Because these options can be +given without an argument (see above), if an argument is present, it must be +given in the same shell item, for example, -o3 or --only-matching=2. The +comments given for the non-argument case above also apply to this option. If +the specified capturing parentheses do not exist in the pattern, or were not +set in the match, nothing is output unless the file name or line number are +being output. +.sp +If this option is given multiple times, multiple substrings are output for each +match, in the order the options are given, and all on one line. For example, +-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and +then 3 again to be output. By default, there is no separator (but see the next +but one option). +.TP +\fB--om-capture\fP=\fInumber\fP +Set the number of capturing parentheses that can be accessed by \fB-o\fP. The +default is 50. +.TP +\fB--om-separator\fP=\fItext\fP +Specify a separating string for multiple occurrences of \fB-o\fP. The default +is an empty string. Separating strings are never coloured. +.TP +\fB-P\fP, \fB--no-ucp\fP +Starting from release 10.43, when UTF/Unicode mode is specified with \fB-u\fP +or \fB-U\fP, the PCRE2_UCP option is used by default. This means that the +POSIX classes in patterns match more than just ASCII characters. For example, +[:digit:] matches any Unicode decimal digit. The \fB--no-ucp\fP option +suppresses PCRE2_UCP, thus restricting the POSIX classes to ASCII characters, +as was the case in earlier releases. Note that there are now more fine-grained +option settings within patterns that affect individual classes. For example, +when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while +allowing \ew to match Unicode letters and digits. +.TP +\fB--posix-pattern-file\fP +When patterns are provided with the \fB-f\fP option, do not trim trailing +spaces or ignore empty lines in a similar way than other grep tools. To keep +the behaviour consistent with older versions, if the pattern read was +terminated with CRLF (as character literals) then both characters won't be +included as part of it, so if you really need to have pattern ending in '\er', +use a escape sequence or provide it by a different method. +.TP +\fB-q\fP, \fB--quiet\fP +Work quietly, that is, display nothing except error messages. The exit +status indicates whether or not any matches were found. +.TP +\fB-r\fP, \fB--recursive\fP +If any given path is a directory, recursively scan the files it contains, +taking note of any \fB--include\fP and \fB--exclude\fP settings. By default, a +directory is read as a normal file; in some operating systems this gives an +immediate end-of-file. This option is a shorthand for setting the \fB-d\fP +option to "recurse". +.TP +\fB--recursion-limit\fP=\fInumber\fP +This is an obsolete synonym for \fB--depth-limit\fP. See \fB--match-limit\fP +above for details. +.TP +\fB-s\fP, \fB--no-messages\fP +Suppress error messages about non-existent or unreadable files. Such files are +quietly skipped. However, the return code is still 2, even if matches were +found in other files. +.TP +\fB-t\fP, \fB--total-count\fP +This option is useful when scanning more than one file. If used on its own, +\fB-t\fP suppresses all output except for a grand total number of matching +lines (or non-matching lines if \fB-v\fP is used) in all the files. If \fB-t\fP +is used with \fB-c\fP, a grand total is output except when the previous output +is just one line. In other words, it is not output when just one file's count +is listed. If file names are being output, the grand total is preceded by +"TOTAL:". Otherwise, it appears as just another number. The \fB-t\fP option is +ignored when used with \fB-L\fP (list files without matches), because the grand +total would always be zero. +.TP +\fB-u\fP, \fB--utf\fP +Operate in UTF/Unicode mode. This option is available only if PCRE2 has been +compiled with UTF-8 support. All patterns (including those for any +\fB--exclude\fP and \fB--include\fP options) and all lines that are scanned +must be valid strings of UTF-8 characters. If an invalid UTF-8 string is +encountered, an error occurs. +.TP +\fB-U\fP, \fB--utf-allow-invalid\fP +As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code +unit sequences. These can never form part of any pattern match. Patterns +themselves, however, must still be valid UTF-8 strings. This facility allows +valid UTF-8 strings to be sought within arbitrary byte sequences in executable +or other binary files. For more details about matching in non-valid UTF-8 +strings, see the +.\" HREF +\fBpcre2unicode\fP(3) +.\" +documentation. +.TP +\fB-V\fP, \fB--version\fP +Write the version numbers of \fBpcre2grep\fP and the PCRE2 library to the +standard output and then exit. Anything else on the command line is +ignored. +.TP +\fB-v\fP, \fB--invert-match\fP +Invert the sense of the match, so that lines which do \fInot\fP match any of +the patterns are the ones that are found. When this option is set, options such +as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match +that are to be output, are ignored. +.TP +\fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP +Force the patterns only to match "words". That is, there must be a word +boundary at the start and end of each matched string. This is equivalent to +having "\eb(?:" at the start of each pattern, and ")\eb" at the end. This +option applies only to the patterns that are matched against the contents of +files; it does not apply to patterns specified by any of the \fB--include\fP or +\fB--exclude\fP options. +.TP +\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP +Force the patterns to start matching only at the beginnings of lines, and in +addition, require them to match entire lines. In multiline mode the match may +be more than one line. This is equivalent to having "^(?:" at the start of each +pattern and ")$" at the end. This option applies only to the patterns that are +matched against the contents of files; it does not apply to patterns specified +by any of the \fB--include\fP or \fB--exclude\fP options. +.TP +\fB-Z\fP, \fB--null\fP +Terminate files names in the regular output with a zero byte (the NUL +character) instead of what would normally appear. This is useful when file +names contain unusual characters such as colons, hyphens, or even newlines. The +option does not apply to file names in error messages. +. +. +.SH "ENVIRONMENT VARIABLES" +.rs +.sp +The environment variables \fBLC_ALL\fP and \fBLC_CTYPE\fP are examined, in that +order, for a locale. The first one that is set is used. This can be overridden +by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default +(usually the "C" locale) is used. +. +. +.SH "NEWLINES" +.rs +.sp +The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with +newline conventions that differ from the default. This option affects only the +way scanned files are processed. It does not affect the interpretation of files +specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or +\fB--include-from\fP options. +.P +Any parts of the scanned input files that are written to the standard output +are copied with whatever newline sequences they have in the input. However, if +the final line of a file is output, and it does not end with a newline +sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF +or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a +single NL is used. +.P +The newline setting does not affect the way in which \fBpcre2grep\fP writes +newlines in informational messages to the standard output and error streams. +Under Windows, the standard output is set to be binary, so that "\er\en" at the +ends of output lines that are copied from the input is not converted to +"\er\er\en" by the C I/O library. This means that any messages written to the +standard output must end with "\er\en". For all other operating systems, and +for all messages to the standard error stream, "\en" is used. +. +. +.SH "OPTIONS COMPATIBILITY WITH GNU GREP" +.rs +.sp +Many of the short and long forms of \fBpcre2grep\fP's options are the same as +in the GNU \fBgrep\fP program. Any long option of the form \fB--xxx-regexp\fP +(GNU terminology) is also available as \fB--xxx-regex\fP (PCRE2 terminology). +However, the \fB--case-restrict\fP, \fB--depth-limit\fP, \fB-E\fP, +\fB--file-list\fP, \fB--file-offsets\fP, \fB--heap-limit\fP, +\fB--include-dir\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, +\fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--no-ucp\fP, +\fB--om-separator\fP, \fB--output\fP, \fB-P\fP, \fB-u\fP, \fB--utf\fP, +\fB-U\fP, and \fB--utf-allow-invalid\fP options are specific to +\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a +capturing parentheses number. +.P +Although most of the common options work the same way, a few are different in +\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob +for GNU \fBgrep\fP, but in \fBpcre2grep\fP it is a regular expression to which +the \fB-i\fP option applies. If both the \fB-c\fP and \fB-l\fP options are +given, GNU grep lists only file names, without counts, but \fBpcre2grep\fP +gives the counts as well. +. +. +.SH "OPTIONS WITH DATA" +.rs +.sp +There are four different ways in which an option with data can be specified. +If a short form option is used, the data may follow immediately, or (with one +exception) in the next command line item. For example: +.sp + -f/some/file + -f /some/file +.sp +The exception is the \fB-o\fP option, which may appear with or without data. +Because of this, if data is present, it must follow immediately in the same +item, for example -o3. +.P +If a long form option is used, the data may appear in the same command line +item, separated by an equals character, or (with two exceptions) it may appear +in the next command line item. For example: +.sp + --file=/some/file + --file /some/file +.sp +Note, however, that if you want to supply a file name beginning with ~ as data +in a shell command, and have the shell expand ~ to a home directory, you must +separate the file name from the option, because the shell does not treat ~ +specially unless it is at the start of an item. +.P +The exceptions to the above are the \fB--colour\fP (or \fB--color\fP) and +\fB--only-matching\fP options, for which the data is optional. If one of these +options does have data, it must be given in the first form, using an equals +character. Otherwise \fBpcre2grep\fP will assume that it has no data. +. +. +.SH "USING PCRE2'S CALLOUT FACILITY" +.rs +.sp +\fBpcre2grep\fP has, by default, support for calling external programs or +scripts or echoing specific strings during matching by making use of PCRE2's +callout facility. However, this support can be completely or partially disabled +when \fBpcre2grep\fP is built. You can find out whether your binary has support +for callouts by running it with the \fB--help\fP option. If callout support is +completely disabled, callouts in patterns are forbidden by \fBpcre2grep\fP. +If the facility is partially disabled, calling external programs is not +supported, and callouts that request it are ignored. +.P +A callout in a PCRE2 pattern is of the form (?C) where the argument is +either a number or a quoted string (see the +.\" HREF +\fBpcre2callout\fP +.\" +documentation for details). Numbered callouts are ignored by \fBpcre2grep\fP; +only callouts with string arguments are useful. +. +. +.SS "Echoing a specific string" +.rs +.sp +Starting the callout string with a pipe character invokes an echoing facility +that avoids calling an external program or script. This facility is always +available, provided that callouts were not completely disabled when +\fBpcre2grep\fP was built. The rest of the callout string is processed as a +zero-terminated string, which means it should not contain any internal binary +zeros. It is written to the output, having first been passed through the same +escape processing as text from the \fB--output\fP (\fB-O\fP) option (see +above). However, $0 or $& cannot be used to insert a matched substring because +the match is still in progress. Instead, the single character '0' is inserted. +Any syntax errors in the string (for example, a dollar not followed by another +character) causes the callout to be ignored. No terminator is added to the +output string, so if you want a newline, you must include it explicitly using +the escape $n. For example: +.sp + pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' +.sp +Matching continues normally after the string is output. If you want to see only +the callout output but not any output from an actual match, you should end the +pattern with (*FAIL). +. +. +.SS "Calling external programs or scripts" +.rs +.sp +This facility can be independently disabled when \fBpcre2grep\fP is built. It +is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS, +where \fBlib$spawn()\fP is used, and for any Unix-like environment where +\fBfork()\fP and \fBexecv()\fP are available. +.P +If the callout string does not start with a pipe (vertical bar) character, it +is parsed into a list of substrings separated by pipe characters. The first +substring must be an executable name, with the following substrings specifying +arguments: +.sp + executable_name|arg1|arg2|... +.sp +Any substring (including the executable name) may contain escape sequences +started by a dollar character. These are the same as for the \fB--output\fP +(\fB-O\fP) option documented above, except that $0 or $& cannot insert the +matched string because the match is still in progress. Instead, the character +\&'0' is inserted. If you need a literal dollar or pipe character in any +substring, use $$ or $| respectively. Here is an example: +.sp + echo -e "abcde\en12345" | pcre2grep \e + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - +.sp + Output: +.sp + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 +.sp +The parameters for the system call that is used to run the program or script +are zero-terminated strings. This means that binary zero characters in the +callout argument will cause premature termination of their substrings, and +therefore should not be present. Any syntax errors in the string (for example, +a dollar not followed by another character) causes the callout to be ignored. +If running the program fails for any reason (including the non-existence of the +executable), a local matching failure occurs and the matcher backtracks in the +normal way. +. +. +.SH "MATCHING ERRORS" +.rs +.sp +It is possible to supply a regular expression that takes a very long time to +fail to match certain lines. Such patterns normally involve nested indefinite +repeats, for example: (a+)*\ed when matched against a line of a's with no final +digit. The PCRE2 matching function has a resource limit that causes it to abort +in these circumstances. If this happens, \fBpcre2grep\fP outputs an error +message and the line that caused the problem to the standard error stream. If +there are more than 20 such errors, \fBpcre2grep\fP gives up. +.P +The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the +overall resource limit. There are also other limits that affect the amount of +memory used during matching; see the discussion of \fB--heap-limit\fP and +\fB--depth-limit\fP above. +. +. +.SH DIAGNOSTICS +.rs +.sp +Exit status is 0 if any matches were found, 1 if no matches were found, and 2 +for syntax errors, overlong lines, non-existent or inaccessible files (even if +matches were found in other files) or too many matching errors. Using the +\fB-s\fP option to suppress error messages about inaccessible files does not +affect the return code. +.P +When run under VMS, the return code is placed in the symbol PCRE2GREP_RC +because VMS does not distinguish between exit(0) and exit(1). +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2pattern\fP(3), \fBpcre2syntax\fP(3), \fBpcre2callout\fP(3), +\fBpcre2unicode\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 04 February 2025 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2grep.txt b/3rd/pcre2/doc/pcre2grep.txt new file mode 100644 index 00000000..9e07a5a7 --- /dev/null +++ b/3rd/pcre2/doc/pcre2grep.txt @@ -0,0 +1,1106 @@ +PCRE2GREP(1) General Commands Manual PCRE2GREP(1) + + +NAME + pcre2grep - a grep with Perl-compatible regular expressions. + + +SYNOPSIS + pcre2grep [options] [long options] [pattern] [path1 path2 ...] + + +DESCRIPTION + + pcre2grep searches files for character patterns, in the same way as + other grep commands do, but it uses the PCRE2 regular expression li- + brary to support patterns that are compatible with the regular expres- + sions of Perl 5. See pcre2syntax(3) for a quick-reference summary of + pattern syntax, or pcre2pattern(3) for a full description of the syntax + and semantics of the regular expressions that PCRE2 supports. + + Patterns, whether supplied on the command line or in a separate file, + are given without delimiters. For example: + + pcre2grep Thursday /etc/motd + + If you attempt to use delimiters (for example, by surrounding a pattern + with slashes, as is common in Perl scripts), they are interpreted as + part of the pattern. Quotes can of course be used to delimit patterns + on the command line because they are interpreted by the shell, and in- + deed quotes are required if a pattern contains white space or shell + metacharacters. + + The first argument that follows any option settings is treated as the + single pattern to be matched when neither -e nor -f is present. Con- + versely, when one or both of these options are used to specify pat- + terns, all arguments are treated as path names. At least one of -e, -f, + or an argument pattern must be provided. + + If no files are specified, pcre2grep reads the standard input. The + standard input can also be referenced by a name consisting of a single + hyphen. For example: + + pcre2grep some-pattern file1 - file3 + + By default, input files are searched line by line, so pattern asser- + tions about the beginning and end of a subject string (^, $, \A, \Z, + and \z) match at the beginning and end of each line. When a line + matches a pattern, it is copied to the standard output, and if there is + more than one file, the file name is output at the start of each line, + followed by a colon. However, there are options that can change how + pcre2grep behaves. For example, the -M option makes it possible to + search for strings that span line boundaries. What defines a line + boundary is controlled by the -N (--newline) option. The -h and -H op- + tions control whether or not file names are shown, and the -Z option + changes the file name terminator to a zero byte. + + The amount of memory used for buffering files that are being scanned is + controlled by parameters that can be set by the --buffer-size and + --max-buffer-size options. The first of these sets the size of buffer + that is obtained at the start of processing. If an input file contains + very long lines, a larger buffer may be needed; this is handled by au- + tomatically extending the buffer, up to the limit specified by --max- + buffer-size. The default values for these parameters can be set when + pcre2grep is built; if nothing is specified, the defaults are set to + 20KiB and 1MiB respectively. An error occurs if a line is too long and + the buffer can no longer be expanded. + + The block of memory that is actually used is three times the "buffer + size", to allow for buffering "before" and "after" lines. If the buffer + size is too small, fewer than requested "before" and "after" lines may + be output. + + When matching with a multiline pattern, the size of the buffer must be + at least half of the maximum match expected or the pattern might fail + to match. + + Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the + greater. BUFSIZ is defined in . When there is more than one + pattern (specified by the use of -e and/or -f), each pattern is applied + to each line in the order in which they are defined, except that all + the -e patterns are tried before the -f patterns. + + By default, as soon as one pattern matches a line, no further patterns + are considered. However, if --colour (or --color) is used to colour the + matching substrings, or if --only-matching, --file-offsets, --line-off- + sets, or --output is used to output only the part of the line that + matched (either shown literally, or as an offset), the behaviour is + different. In this situation, all the patterns are applied to the line. + If there is more than one match, the one that begins nearest to the + start of the subject is processed; if there is more than one match at + that position, the one with the longest matching substring is + processed; if the matching substrings are equal, the first match found + is processed. + + Scanning with all the patterns resumes immediately following the match, + so that later matches on the same line can be found. Note, however, + that an overlapping match that starts in the middle of another match + will not be processed. + + The above behaviour was changed at release 10.41 to be more compatible + with GNU grep. In earlier releases, pcre2grep did not recognize matches + from later patterns that were earlier in the subject. + + Patterns that can match an empty string are accepted, but empty string + matches are never recognized. An example is the pattern "(su- + per)?(man)?", in which all components are optional. This pattern finds + all occurrences of both "super" and "man"; the output differs from + matching with "super|man" when only the matching substrings are being + shown. + + If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses + the value to set a locale when calling the PCRE2 library. The --locale + option can be used to override this. + + +SUPPORT FOR COMPRESSED FILES + + Compile-time options for pcre2grep can set it up to use libz or libbz2 + for reading compressed files whose names end in .gz or .bz2, respec- + tively. You can find out whether your pcre2grep binary has support for + one or both of these file types by running it with the --help option. + If the appropriate support is not present, all files are treated as + plain text. The standard input is always so treated. If a file with a + .gz or .bz2 extension is not in fact compressed, it is read as a plain + text file. When input is from a compressed .gz or .bz2 file, the + --line-buffered option is ignored. + + +BINARY FILES + + By default, a file that contains a binary zero byte within the first + 1024 bytes is identified as a binary file, and is processed specially. + However, if the newline type is specified as NUL, that is, the line + terminator is a binary zero, the test for a binary file is not applied. + See the --binary-files option for a means of changing the way binary + files are handled. + + +BINARY ZEROS IN PATTERNS + + Patterns passed from the command line are strings that are terminated + by a binary zero, so cannot contain internal zeros. However, patterns + that are read from a file via the -f option may contain binary zeros. + + +OPTIONS + + The order in which some of the options appear can affect the output. + For example, both the -H and -l options affect the printing of file + names. Whichever comes later in the command line will be the one that + takes effect. Similarly, except where noted below, if an option is + given twice, the later setting is used. Numerical values for options + may be followed by K or M, to signify multiplication by 1024 or + 1024*1024 respectively. + + -- This terminates the list of options. It is useful if the next + item on the command line starts with a hyphen but is not an + option. This allows for the processing of patterns and file + names that start with hyphens. + + -A number, --after-context=number + Output up to number lines of context after each matching + line. Fewer lines are output if the next match or the end of + the file is reached, or if the processing buffer size has + been set too small. If file names and/or line numbers are be- + ing output, a hyphen separator is used instead of a colon for + the context lines (the -Z option can be used to change the + file name terminator to a zero byte). A line containing "--" + is output between each group of lines, unless they are in + fact contiguous in the input file. The value of number is ex- + pected to be relatively small. When -c is used, -A is ig- + nored. + + -a, --text + Treat binary files as text. This is equivalent to --binary- + files=text. + + --allow-lookaround-bsk + PCRE2 now forbids the use of \K in lookarounds by default, in + line with Perl. This option causes pcre2grep to set the + PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option, which enables this + somewhat dangerous usage. + + -B number, --before-context=number + Output up to number lines of context before each matching + line. Fewer lines are output if the previous match or the + start of the file is within number lines, or if the process- + ing buffer size has been set too small. If file names and/or + line numbers are being output, a hyphen separator is used in- + stead of a colon for the context lines (the -Z option can be + used to change the file name terminator to a zero byte). A + line containing "--" is output between each group of lines, + unless they are in fact contiguous in the input file. The + value of number is expected to be relatively small. When -c + is used, -B is ignored. + + --binary-files=word + Specify how binary files are to be processed. If the word is + "binary" (the default), pattern matching is performed on bi- + nary files, but the only output is "Binary file + matches" when a match succeeds. If the word is "text", which + is equivalent to the -a or --text option, binary files are + processed in the same way as any other file. In this case, + when a match succeeds, the output may be binary garbage, + which can have nasty effects if sent to a terminal. If the + word is "without-match", which is equivalent to the -I op- + tion, binary files are not processed at all; they are assumed + not to be of interest and are skipped without causing any + output or affecting the return code. + + --buffer-size=number + Set the parameter that controls how much memory is obtained + at the start of processing for buffering files that are being + scanned. See also --max-buffer-size below. + + -C number, --context=number + Output number lines of context both before and after each + matching line. This is equivalent to setting both -A and -B + to the same value. + + -c, --count + Do not output lines from the files that are being scanned; + instead output the number of lines that would have been + shown, either because they matched, or, if -v is set, because + they failed to match. By default, this count is exactly the + same as the number of lines that would have been output, but + if the -M (multiline) option is used (without -v), there may + be more suppressed lines than the count (that is, the number + of matches). + + If no lines are selected, the number zero is output. If sev- + eral files are being scanned, a count is output for each of + them and the -t option can be used to cause a total to be + output at the end. However, if the --files-with-matches op- + tion is also used, only those files whose counts are greater + than zero are listed. When -c is used, the -A, -B, and -C op- + tions are ignored. + + --colour, --color + If this option is given without any data, it is equivalent to + "--colour=auto". If data is required, it must be given in + the same shell item, separated by an equals sign. + + --colour=value, --color=value + This option specifies under what circumstances the parts of a + line that matched a pattern should be coloured in the output. + It is ignored if --file-offsets, --line-offsets, or --output + is set. By default, output is not coloured. The value for the + --colour option (which is optional, see above) may be + "never", "always", or "auto". In the latter case, colouring + happens only if the standard output is connected to a termi- + nal. More resources are used when colouring is enabled, be- + cause pcre2grep has to search for all possible matches in a + line, not just one, in order to colour them all. + + The colour that is used can be specified by setting one of + the environment variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, + PCREGREP_COLOUR, or PCREGREP_COLOR, which are checked in that + order. If none of these are set, pcre2grep looks for + GREP_COLORS or GREP_COLOR (in that order). The value of the + variable should be a string of two numbers, separated by a + semicolon, except in the case of GREP_COLORS, which must + start with "ms=" or "mt=" followed by two semicolon-separated + colours, terminated by the end of the string or by a colon. + If GREP_COLORS does not start with "ms=" or "mt=" it is ig- + nored, and GREP_COLOR is checked. + + If the string obtained from one of the above variables con- + tains any characters other than semicolon or digits, the set- + ting is ignored and the default colour is used. The string is + copied directly into the control string for setting colour on + a terminal, so it is your responsibility to ensure that the + values make sense. If no relevant environment variable is + set, the default is "1;31", which gives red. + + -D action, --devices=action + If an input path is not a regular file or a directory, "ac- + tion" specifies how it is to be processed. Valid values are + "read" (the default) or "skip" (silently skip the path). + + -d action, --directories=action + If an input path is a directory, "action" specifies how it is + to be processed. Valid values are "read" (the default in + non-Windows environments, for compatibility with GNU grep), + "recurse" (equivalent to the -r option), or "skip" (silently + skip the path, the default in Windows environments). In the + "read" case, directories are read as if they were ordinary + files. In some operating systems the effect of reading a di- + rectory like this is an immediate end-of-file; in others it + may provoke an error. + + --depth-limit=number + See --match-limit below. + + -E, --case-restrict + When case distinctions are being ignored in Unicode mode, two + ASCII letters (K and S) will by default match Unicode charac- + ters U+212A (Kelvin sign) and U+017F (long S) respectively, + as well as their lower case ASCII counterparts. When this op- + tion is set, case equivalences are restricted such that no + ASCII character matches a non-ASCII character, and vice + versa. + + -e pattern, --regex=pattern, --regexp=pattern + Specify a pattern to be matched. This option can be used mul- + tiple times in order to specify several patterns. It can also + be used as a way of specifying a single pattern that starts + with a hyphen. When -e is used, no argument pattern is taken + from the command line; all arguments are treated as file + names. There is no limit to the number of patterns. They are + applied to each line in the order in which they are defined. + + If -f is used with -e, the command line patterns are matched + first, followed by the patterns from the file(s), independent + of the order in which these options are specified. + + --exclude=pattern + Files (but not directories) whose names match the pattern are + skipped without being processed. This applies to all files, + whether listed on the command line, obtained from --file- + list, or by scanning a directory. The pattern is a PCRE2 reg- + ular expression, and is matched against the final component + of the file name, not the entire path. The -F, -w, and -x op- + tions do not apply to this pattern. The option may be given + any number of times in order to specify multiple patterns. If + a file name matches both an --include and an --exclude pat- + tern, it is excluded. There is no short form for this option. + + --exclude-from=filename + Treat each non-empty line of the file as the data for an + --exclude option. What constitutes a newline when reading the + file is the operating system's default. The --newline option + has no effect on this option. This option may be given more + than once in order to specify a number of files to read. + + --exclude-dir=pattern + Directories whose names match the pattern are skipped without + being processed, whatever the setting of the --recursive op- + tion. This applies to all directories, whether listed on the + command line, obtained from --file-list, or by scanning a + parent directory. The pattern is a PCRE2 regular expression, + and is matched against the final component of the directory + name, not the entire path. The -F, -w, and -x options do not + apply to this pattern. The option may be given any number of + times in order to specify more than one pattern. If a direc- + tory matches both --include-dir and --exclude-dir, it is ex- + cluded. There is no short form for this option. + + -F, --fixed-strings + Interpret each data-matching pattern as a list of fixed + strings, separated by newlines, instead of as a regular ex- + pression. What constitutes a newline for this purpose is con- + trolled by the --newline option. The -w (match as a word) and + -x (match whole line) options can be used with -F. They ap- + ply to each of the fixed strings. A line is selected if any + of the fixed strings are found in it (subject to -w or -x, if + present). This option applies only to the patterns that are + matched against the contents of files; it does not apply to + patterns specified by any of the --include or --exclude op- + tions. + + -f filename, --file=filename + Read patterns from the file, one per line. As is the case + with patterns on the command line, no delimiters should be + used. What constitutes a newline when reading the file is the + operating system's default interpretation of \n. The --new- + line option has no effect on this option. Trailing white + space is removed from each line, and blank lines are ignored + unless the --posix-pattern-file option is also provided. An + empty file contains no patterns and therefore matches noth- + ing. Patterns read from a file in this way may contain binary + zeros, which are treated as ordinary character literals. + + If this option is given more than once, all the specified + files are read. A data line is output if any of the patterns + match it. A file name can be given as "-" to refer to the + standard input. When -f is used, patterns specified on the + command line using -e may also be present; they are matched + before the file's patterns. However, no pattern is taken from + the command line; all arguments are treated as the names of + paths to be searched. + + --file-list=filename + Read a list of files and/or directories that are to be + scanned from the given file, one per line. What constitutes a + newline when reading the file is the operating system's de- + fault. Trailing white space is removed from each line, and + blank lines are ignored. These paths are processed before any + that are listed on the command line. The file name can be + given as "-" to refer to the standard input. If --file and + --file-list are both specified as "-", patterns are read + first. This is useful only when the standard input is a ter- + minal, from which further lines (the list of files) can be + read after an end-of-file indication. If this option is given + more than once, all the specified files are read. + + --file-offsets + Instead of showing lines or parts of lines that match, show + each match as an offset from the start of the file and a + length, separated by a comma. In this mode, --colour has no + effect, and no context is shown. That is, the -A, -B, and -C + options are ignored. If there is more than one match in a + line, each of them is shown separately. This option is mutu- + ally exclusive with --output, --line-offsets, and --only- + matching. + + --group-separator=text + Output this text string instead of two hyphens between groups + of lines when -A, -B, or -C is in use. See also --no-group- + separator. + + -H, --with-filename + Force the inclusion of the file name at the start of output + lines when searching a single file. The file name is not nor- + mally shown in this case. By default, for matching lines, + the file name is followed by a colon; for context lines, a + hyphen separator is used. The -Z option can be used to change + the terminator to a zero byte. If a line number is also being + output, it follows the file name. When the -M option causes a + pattern to match more than one line, only the first is pre- + ceded by the file name. This option overrides any previous + -h, -l, or -L options. + + -h, --no-filename + Suppress the output file names when searching multiple files. + File names are normally shown when multiple files are + searched. By default, for matching lines, the file name is + followed by a colon; for context lines, a hyphen separator is + used. The -Z option can be used to change the terminator to a + zero byte. If a line number is also being output, it follows + the file name. This option overrides any previous -H, -L, or + -l options. + + --heap-limit=number + See --match-limit below. + + --help Output a help message, giving brief details of the command + options and file type support, and then exit. Anything else + on the command line is ignored. + + -I Ignore binary files. This is equivalent to --binary- + files=without-match. + + -i, --ignore-case + Ignore upper/lower case distinctions when pattern matching. + This applies when matching path names for inclusion or exclu- + sion as well as when matching lines in files. + + --include=pattern + If any --include patterns are specified, the only files that + are processed are those whose names match one of the patterns + and do not match an --exclude pattern. This option does not + affect directories, but it applies to all files, whether + listed on the command line, obtained from --file-list, or by + scanning a directory. The pattern is a PCRE2 regular expres- + sion, and is matched against the final component of the file + name, not the entire path. The -F, -w, and -x options do not + apply to this pattern. The option may be given any number of + times. If a file name matches both an --include and an --ex- + clude pattern, it is excluded. There is no short form for + this option. + + --include-from=filename + Treat each non-empty line of the file as the data for an + --include option. What constitutes a newline for this purpose + is the operating system's default. The --newline option has + no effect on this option. This option may be given any number + of times; all the files are read. + + --include-dir=pattern + If any --include-dir patterns are specified, the only direc- + tories that are processed are those whose names match one of + the patterns and do not match an --exclude-dir pattern. This + applies to all directories, whether listed on the command + line, obtained from --file-list, or by scanning a parent di- + rectory. The pattern is a PCRE2 regular expression, and is + matched against the final component of the directory name, + not the entire path. The -F, -w, and -x options do not apply + to this pattern. The option may be given any number of times. + If a directory matches both --include-dir and --exclude-dir, + it is excluded. There is no short form for this option. + + -L, --files-without-match + Instead of outputting lines from the files, just output the + names of the files that do not contain any lines that would + have been output. Each file name is output once, on a sepa- + rate line by default, but if the -Z option is set, they are + separated by zero bytes instead of newlines. This option + overrides any previous -H, -h, or -l options. + + -l, --files-with-matches + Instead of outputting lines from the files, just output the + names of the files containing lines that would have been out- + put. Each file name is output once, on a separate line, but + if the -Z option is set, they are separated by zero bytes in- + stead of newlines. Searching normally stops as soon as a + matching line is found in a file. However, if the -c (count) + option is also used, matching continues in order to obtain + the correct count, and those files that have at least one + match are listed along with their counts. Using this option + with -c is a way of suppressing the listing of files with no + matches that occurs with -c on its own. This option overrides + any previous -H, -h, or -L options. + + --label=name + This option supplies a name to be used for the standard input + when file names are being output. If not supplied, "(standard + input)" is used. There is no short form for this option. + + --line-buffered + When this option is given, non-compressed input is read and + processed line by line, and the output is flushed after each + write. By default, input is read in large chunks, unless + pcre2grep can determine that it is reading from a terminal, + which is currently possible only in Unix-like environments or + Windows. Output to terminal is normally automatically flushed + by the operating system. This option can be useful when the + input or output is attached to a pipe and you do not want + pcre2grep to buffer up large amounts of data. However, its + use will affect performance, and the -M (multiline) option + ceases to work. When input is from a compressed .gz or .bz2 + file, --line-buffered is ignored. + + --line-offsets + Instead of showing lines or parts of lines that match, show + each match as a line number, the offset from the start of the + line, and a length. The line number is terminated by a colon + (as usual; see the -n option), and the offset and length are + separated by a comma. In this mode, --colour has no effect, + and no context is shown. That is, the -A, -B, and -C options + are ignored. If there is more than one match in a line, each + of them is shown separately. This option is mutually exclu- + sive with --output, --file-offsets, and --only-matching. + + --locale=locale-name + This option specifies a locale to be used for pattern match- + ing. It overrides the value in the LC_ALL or LC_CTYPE envi- + ronment variables. If no locale is specified, the PCRE2 li- + brary's default (usually the "C" locale) is used. There is no + short form for this option. + + -M, --multiline + Allow patterns to match more than one line. When this option + is set, the PCRE2 library is called in "multiline" mode, and + a match is allowed to continue past the end of the initial + line and onto one or more subsequent lines. + + Patterns used with -M may usefully contain literal newline + characters and internal occurrences of ^ and $ characters, + because in multiline mode these can match at internal new- + lines. Because pcre2grep is scanning multiple lines, the \Z + and \z assertions match only at the end of the last line in + the file. The \A assertion matches at the start of the first + line of a match. This can be any line in the file; it is not + anchored to the first line. + + The output for a successful match may consist of more than + one line. The first line is the line in which the match + started, and the last line is the line in which the match + ended. If the matched string ends with a newline sequence, + the output ends at the end of that line. If -v is set, none + of the lines in a multi-line match are output. Once a match + has been handled, scanning restarts at the beginning of the + line after the one in which the match ended. + + The newline sequence that separates multiple lines must be + matched as part of the pattern. For example, to find the + phrase "regular expression" in a file where "regular" might + be at the end of a line and "expression" at the start of the + next line, you could use this command: + + pcre2grep -M 'regular\s+expression' + + The \s escape sequence matches any white space character, in- + cluding newlines, and is followed by + so as to match trail- + ing white space on the first line as well as possibly han- + dling a two-character newline sequence. + + There is a limit to the number of lines that can be matched, + imposed by the way that pcre2grep buffers the input file as + it scans it. With a sufficiently large processing buffer, + this should not be a problem. + + The -M option does not work when input is read line by line + (see --line-buffered.) + + -m number, --max-count=number + Stop processing after finding number matching lines, or non- + matching lines if -v is also set. Any trailing context lines + are output after the final match. In multiline mode, each + multiline match counts as just one line for this purpose. If + this limit is reached when reading the standard input from a + regular file, the file is left positioned just after the last + matching line. If -c is also set, the count that is output + is never greater than number. This option has no effect if + used with -L, -l, or -q, or when just checking for a match in + a binary file. + + --match-limit=number + Processing some regular expression patterns may take a very + long time to search for all possible matching strings. Others + may require a very large amount of memory. There are three + options that set resource limits for matching. + + The --match-limit option provides a means of limiting comput- + ing resource usage when processing patterns that are not go- + ing to match, but which have a very large number of possibil- + ities in their search trees. The classic example is a pattern + that uses nested unlimited repeats. Internally, PCRE2 has a + counter that is incremented each time around its main pro- + cessing loop. If the value set by --match-limit is reached, + an error occurs. + + The --heap-limit option specifies, as a number of kibibytes + (units of 1024 bytes), the maximum amount of heap memory that + may be used for matching. + + The --depth-limit option limits the depth of nested back- + tracking points, which indirectly limits the amount of memory + that is used. The amount of memory needed for each backtrack- + ing point depends on the number of capturing parentheses in + the pattern, so the amount of memory that is used before this + limit acts varies from pattern to pattern. This limit is of + use only if it is set smaller than --match-limit. + + There are no short forms for these options. The default lim- + its can be set when the PCRE2 library is compiled; if they + are not specified, the defaults are very large and so effec- + tively unlimited. + + --max-buffer-size=number + This limits the expansion of the processing buffer, whose + initial size can be set by --buffer-size. The maximum buffer + size is silently forced to be no smaller than the starting + buffer size. + + -N newline-type, --newline=newline-type + Six different conventions for indicating the ends of lines in + scanned files are supported. For example: + + pcre2grep -N CRLF 'some pattern' + + The newline type may be specified in upper, lower, or mixed + case. If the newline type is NUL, lines are separated by bi- + nary zero characters. The other types are the single-charac- + ter sequences CR (carriage return) and LF (linefeed), the + two-character sequence CRLF, an "anycrlf" type, which recog- + nizes any of the preceding three types, and an "any" type, + for which any Unicode line ending sequence is assumed to end + a line. The Unicode sequences are the three just mentioned, + plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL + (next line, U+0085), LS (line separator, U+2028), and PS + (paragraph separator, U+2029). + + When the PCRE2 library is built, a default line-ending se- + quence is specified. This is normally the standard sequence + for the operating system. Unless otherwise specified by this + option, pcre2grep uses the library's default. + + This option makes it possible to use pcre2grep to scan files + that have come from other environments without having to mod- + ify their line endings. If the data that is being scanned + does not agree with the convention set by this option, + pcre2grep may behave in strange ways. Note that this option + does not apply to files specified by the -f, --exclude-from, + or --include-from options, which are expected to use the op- + erating system's standard newline sequence. + + -n, --line-number + Precede each output line by its line number in the file, fol- + lowed by a colon for matching lines or a hyphen for context + lines. If the file name is also being output, it precedes the + line number. When the -M option causes a pattern to match + more than one line, only the first is preceded by its line + number. This option is forced if --line-offsets is used. + + --no-group-separator + Do not output a separator between groups of lines when -A, + -B, or -C is in use. The default is to output a line contain- + ing two hyphens. See also --group-separator. + + --no-jit If the PCRE2 library is built with support for just-in-time + compiling (which speeds up matching), pcre2grep automatically + makes use of this, unless it was explicitly disabled at build + time. This option can be used to disable the use of JIT at + run time. It is provided for testing and working around prob- + lems. It should never be needed in normal use. + + -O text, --output=text + When there is a match, instead of outputting the line that + matched, output just the text specified in this option, fol- + lowed by an operating-system standard newline. In this mode, + --colour has no effect, and no context is shown. That is, + the -A, -B, and -C options are ignored. The --newline option + has no effect on this option, which is mutually exclusive + with --only-matching, --file-offsets, and --line-offsets. + However, like --only-matching, if there is more than one + match in a line, each of them causes a line of output. + + Escape sequences starting with a dollar character may be used + to insert the contents of the matched part of the line and/or + captured substrings into the text. + + $ or ${} is replaced by the captured sub- + string of the given decimal number; $& (or the legacy $0) + substitutes the whole match. If the number is greater than + the number of capturing substrings, or if the capture is un- + set, the replacement is empty. + + $a is replaced by bell; $b by backspace; $e by escape; $f by + form feed; $n by newline; $r by carriage return; $t by tab; + $v by vertical tab. + + $o or $o{} is replaced by the character whose + code point is the given octal number. In the first form, up + to three octal digits are processed. When more digits are + needed in Unicode mode to specify a wide character, the sec- + ond form must be used. + + $x or $x{} is replaced by the character rep- + resented by the given hexadecimal number. In the first form, + up to two hexadecimal digits are processed. When more digits + are needed in Unicode mode to specify a wide character, the + second form must be used. + + Any other character is substituted by itself. In particular, + $$ is replaced by a single dollar. + + -o, --only-matching + Show only the part of the line that matched a pattern instead + of the whole line. In this mode, no context is shown. That + is, the -A, -B, and -C options are ignored. If there is more + than one match in a line, each of them is shown separately, + on a separate line of output. If -o is combined with -v (in- + vert the sense of the match to find non-matching lines), no + output is generated, but the return code is set appropri- + ately. If the matched portion of the line is empty, nothing + is output unless the file name or line number are being + printed, in which case they are shown on an otherwise empty + line. This option is mutually exclusive with --output, + --file-offsets and --line-offsets. + + -onumber, --only-matching=number + Show only the part of the line that matched the capturing + parentheses of the given number. Up to 50 capturing parenthe- + ses are supported by default. This limit can be changed via + the --om-capture option. A pattern may contain any number of + capturing parentheses, but only those whose number is within + the limit can be accessed by -o. An error occurs if the num- + ber specified by -o is greater than the limit. + + -o0 is the same as -o without a number. Because these options + can be given without an argument (see above), if an argument + is present, it must be given in the same shell item, for ex- + ample, -o3 or --only-matching=2. The comments given for the + non-argument case above also apply to this option. If the + specified capturing parentheses do not exist in the pattern, + or were not set in the match, nothing is output unless the + file name or line number are being output. + + If this option is given multiple times, multiple substrings + are output for each match, in the order the options are + given, and all on one line. For example, -o3 -o1 -o3 causes + the substrings matched by capturing parentheses 3 and 1 and + then 3 again to be output. By default, there is no separator + (but see the next but one option). + + --om-capture=number + Set the number of capturing parentheses that can be accessed + by -o. The default is 50. + + --om-separator=text + Specify a separating string for multiple occurrences of -o. + The default is an empty string. Separating strings are never + coloured. + + -P, --no-ucp + Starting from release 10.43, when UTF/Unicode mode is speci- + fied with -u or -U, the PCRE2_UCP option is used by default. + This means that the POSIX classes in patterns match more than + just ASCII characters. For example, [:digit:] matches any + Unicode decimal digit. The --no-ucp option suppresses + PCRE2_UCP, thus restricting the POSIX classes to ASCII char- + acters, as was the case in earlier releases. Note that there + are now more fine-grained option settings within patterns + that affect individual classes. For example, when in UCP + mode, the sequence (?aP) restricts [:word:] to ASCII letters, + while allowing \w to match Unicode letters and digits. + + --posix-pattern-file + When patterns are provided with the -f option, do not trim + trailing spaces or ignore empty lines in a similar way than + other grep tools. To keep the behaviour consistent with older + versions, if the pattern read was terminated with CRLF (as + character literals) then both characters won't be included as + part of it, so if you really need to have pattern ending in + '\r', use a escape sequence or provide it by a different + method. + + -q, --quiet + Work quietly, that is, display nothing except error messages. + The exit status indicates whether or not any matches were + found. + + -r, --recursive + If any given path is a directory, recursively scan the files + it contains, taking note of any --include and --exclude set- + tings. By default, a directory is read as a normal file; in + some operating systems this gives an immediate end-of-file. + This option is a shorthand for setting the -d option to "re- + curse". + + --recursion-limit=number + This is an obsolete synonym for --depth-limit. See --match- + limit above for details. + + -s, --no-messages + Suppress error messages about non-existent or unreadable + files. Such files are quietly skipped. However, the return + code is still 2, even if matches were found in other files. + + -t, --total-count + This option is useful when scanning more than one file. If + used on its own, -t suppresses all output except for a grand + total number of matching lines (or non-matching lines if -v + is used) in all the files. If -t is used with -c, a grand to- + tal is output except when the previous output is just one + line. In other words, it is not output when just one file's + count is listed. If file names are being output, the grand + total is preceded by "TOTAL:". Otherwise, it appears as just + another number. The -t option is ignored when used with -L + (list files without matches), because the grand total would + always be zero. + + -u, --utf Operate in UTF/Unicode mode. This option is available only if + PCRE2 has been compiled with UTF-8 support. All patterns (in- + cluding those for any --exclude and --include options) and + all lines that are scanned must be valid strings of UTF-8 + characters. If an invalid UTF-8 string is encountered, an er- + ror occurs. + + -U, --utf-allow-invalid + As --utf, but in addition subject lines may contain invalid + UTF-8 code unit sequences. These can never form part of any + pattern match. Patterns themselves, however, must still be + valid UTF-8 strings. This facility allows valid UTF-8 strings + to be sought within arbitrary byte sequences in executable or + other binary files. For more details about matching in non- + valid UTF-8 strings, see the pcre2unicode(3) documentation. + + -V, --version + Write the version numbers of pcre2grep and the PCRE2 library + to the standard output and then exit. Anything else on the + command line is ignored. + + -v, --invert-match + Invert the sense of the match, so that lines which do not + match any of the patterns are the ones that are found. When + this option is set, options such as --only-matching and + --output, which specify parts of a match that are to be out- + put, are ignored. + + -w, --word-regex, --word-regexp + Force the patterns only to match "words". That is, there must + be a word boundary at the start and end of each matched + string. This is equivalent to having "\b(?:" at the start of + each pattern, and ")\b" at the end. This option applies only + to the patterns that are matched against the contents of + files; it does not apply to patterns specified by any of the + --include or --exclude options. + + -x, --line-regex, --line-regexp + Force the patterns to start matching only at the beginnings + of lines, and in addition, require them to match entire + lines. In multiline mode the match may be more than one line. + This is equivalent to having "^(?:" at the start of each pat- + tern and ")$" at the end. This option applies only to the + patterns that are matched against the contents of files; it + does not apply to patterns specified by any of the --include + or --exclude options. + + -Z, --null + Terminate files names in the regular output with a zero byte + (the NUL character) instead of what would normally appear. + This is useful when file names contain unusual characters + such as colons, hyphens, or even newlines. The option does + not apply to file names in error messages. + + +ENVIRONMENT VARIABLES + + The environment variables LC_ALL and LC_CTYPE are examined, in that or- + der, for a locale. The first one that is set is used. This can be over- + ridden by the --locale option. If no locale is set, the PCRE2 library's + default (usually the "C" locale) is used. + + +NEWLINES + + The -N (--newline) option allows pcre2grep to scan files with newline + conventions that differ from the default. This option affects only the + way scanned files are processed. It does not affect the interpretation + of files specified by the -f, --file-list, --exclude-from, or --in- + clude-from options. + + Any parts of the scanned input files that are written to the standard + output are copied with whatever newline sequences they have in the in- + put. However, if the final line of a file is output, and it does not + end with a newline sequence, a newline sequence is added. If the new- + line setting is CR, LF, CRLF or NUL, that line ending is output; for + the other settings (ANYCRLF or ANY) a single NL is used. + + The newline setting does not affect the way in which pcre2grep writes + newlines in informational messages to the standard output and error + streams. Under Windows, the standard output is set to be binary, so + that "\r\n" at the ends of output lines that are copied from the input + is not converted to "\r\r\n" by the C I/O library. This means that any + messages written to the standard output must end with "\r\n". For all + other operating systems, and for all messages to the standard error + stream, "\n" is used. + + +OPTIONS COMPATIBILITY WITH GNU GREP + + Many of the short and long forms of pcre2grep's options are the same as + in the GNU grep program. Any long option of the form --xxx-regexp (GNU + terminology) is also available as --xxx-regex (PCRE2 terminology). + However, the --case-restrict, --depth-limit, -E, --file-list, --file- + offsets, --heap-limit, --include-dir, --line-offsets, --locale, + --match-limit, -M, --multiline, -N, --newline, --no-ucp, --om-separa- + tor, --output, -P, -u, --utf, -U, and --utf-allow-invalid options are + specific to pcre2grep, as is the use of the --only-matching option with + a capturing parentheses number. + + Although most of the common options work the same way, a few are dif- + ferent in pcre2grep. For example, the --include option's argument is a + glob for GNU grep, but in pcre2grep it is a regular expression to which + the -i option applies. If both the -c and -l options are given, GNU + grep lists only file names, without counts, but pcre2grep gives the + counts as well. + + +OPTIONS WITH DATA + + There are four different ways in which an option with data can be spec- + ified. If a short form option is used, the data may follow immedi- + ately, or (with one exception) in the next command line item. For exam- + ple: + + -f/some/file + -f /some/file + + The exception is the -o option, which may appear with or without data. + Because of this, if data is present, it must follow immediately in the + same item, for example -o3. + + If a long form option is used, the data may appear in the same command + line item, separated by an equals character, or (with two exceptions) + it may appear in the next command line item. For example: + + --file=/some/file + --file /some/file + + Note, however, that if you want to supply a file name beginning with ~ + as data in a shell command, and have the shell expand ~ to a home di- + rectory, you must separate the file name from the option, because the + shell does not treat ~ specially unless it is at the start of an item. + + The exceptions to the above are the --colour (or --color) and --only- + matching options, for which the data is optional. If one of these op- + tions does have data, it must be given in the first form, using an + equals character. Otherwise pcre2grep will assume that it has no data. + + +USING PCRE2'S CALLOUT FACILITY + + pcre2grep has, by default, support for calling external programs or + scripts or echoing specific strings during matching by making use of + PCRE2's callout facility. However, this support can be completely or + partially disabled when pcre2grep is built. You can find out whether + your binary has support for callouts by running it with the --help op- + tion. If callout support is completely disabled, callouts in patterns + are forbidden by pcre2grep. If the facility is partially disabled, + calling external programs is not supported, and callouts that request + it are ignored. + + A callout in a PCRE2 pattern is of the form (?C) where the argu- + ment is either a number or a quoted string (see the pcre2callout docu- + mentation for details). Numbered callouts are ignored by pcre2grep; + only callouts with string arguments are useful. + + Echoing a specific string + + Starting the callout string with a pipe character invokes an echoing + facility that avoids calling an external program or script. This facil- + ity is always available, provided that callouts were not completely + disabled when pcre2grep was built. The rest of the callout string is + processed as a zero-terminated string, which means it should not con- + tain any internal binary zeros. It is written to the output, having + first been passed through the same escape processing as text from the + --output (-O) option (see above). However, $0 or $& cannot be used to + insert a matched substring because the match is still in progress. In- + stead, the single character '0' is inserted. Any syntax errors in the + string (for example, a dollar not followed by another character) causes + the callout to be ignored. No terminator is added to the output string, + so if you want a newline, you must include it explicitly using the es- + cape $n. For example: + + pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' + + Matching continues normally after the string is output. If you want to + see only the callout output but not any output from an actual match, + you should end the pattern with (*FAIL). + + Calling external programs or scripts + + This facility can be independently disabled when pcre2grep is built. It + is supported for Windows, where a call to _spawnvp() is used, for VMS, + where lib$spawn() is used, and for any Unix-like environment where + fork() and execv() are available. + + If the callout string does not start with a pipe (vertical bar) charac- + ter, it is parsed into a list of substrings separated by pipe charac- + ters. The first substring must be an executable name, with the follow- + ing substrings specifying arguments: + + executable_name|arg1|arg2|... + + Any substring (including the executable name) may contain escape se- + quences started by a dollar character. These are the same as for the + --output (-O) option documented above, except that $0 or $& cannot in- + sert the matched string because the match is still in progress. In- + stead, the character '0' is inserted. If you need a literal dollar or + pipe character in any substring, use $$ or $| respectively. Here is an + example: + + echo -e "abcde\n12345" | pcre2grep \ + '(?x)(.)(..(.)) + (?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' - + + Output: + + Arg1: [a] [bcd] [d] Arg2: |a| () + abcde + Arg1: [1] [234] [4] Arg2: |1| () + 12345 + + The parameters for the system call that is used to run the program or + script are zero-terminated strings. This means that binary zero charac- + ters in the callout argument will cause premature termination of their + substrings, and therefore should not be present. Any syntax errors in + the string (for example, a dollar not followed by another character) + causes the callout to be ignored. If running the program fails for any + reason (including the non-existence of the executable), a local match- + ing failure occurs and the matcher backtracks in the normal way. + + +MATCHING ERRORS + + It is possible to supply a regular expression that takes a very long + time to fail to match certain lines. Such patterns normally involve + nested indefinite repeats, for example: (a+)*\d when matched against a + line of a's with no final digit. The PCRE2 matching function has a re- + source limit that causes it to abort in these circumstances. If this + happens, pcre2grep outputs an error message and the line that caused + the problem to the standard error stream. If there are more than 20 + such errors, pcre2grep gives up. + + The --match-limit option of pcre2grep can be used to set the overall + resource limit. There are also other limits that affect the amount of + memory used during matching; see the discussion of --heap-limit and + --depth-limit above. + + +DIAGNOSTICS + + Exit status is 0 if any matches were found, 1 if no matches were found, + and 2 for syntax errors, overlong lines, non-existent or inaccessible + files (even if matches were found in other files) or too many matching + errors. Using the -s option to suppress error messages about inaccessi- + ble files does not affect the return code. + + When run under VMS, the return code is placed in the symbol + PCRE2GREP_RC because VMS does not distinguish between exit(0) and + exit(1). + + +SEE ALSO + + pcre2pattern(3), pcre2syntax(3), pcre2callout(3), pcre2unicode(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 04 February 2025 + Copyright (c) 1997-2023 University of Cambridge. + + +PCRE2 10.45 04 February 2025 PCRE2GREP(1) diff --git a/3rd/pcre2/doc/pcre2jit.3 b/3rd/pcre2/doc/pcre2jit.3 new file mode 100644 index 00000000..7567839c --- /dev/null +++ b/3rd/pcre2/doc/pcre2jit.3 @@ -0,0 +1,489 @@ +.TH PCRE2JIT 3 "22 August 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" +.rs +.sp +Just-in-time compiling is a heavyweight optimization that can greatly speed up +pattern matching. However, it comes at the cost of extra processing before the +match is performed, so it is of most benefit when the same pattern is going to +be matched many times. This does not necessarily mean many calls of a matching +function; if the pattern is not anchored, matching attempts may take place many +times at various positions in the subject, even for a single call. Therefore, +if the subject string is very long, it may still pay to use JIT even for +one-off matches. JIT support is available for all of the 8-bit, 16-bit and +32-bit PCRE2 libraries. +.P +JIT support applies only to the traditional Perl-compatible matching function. +It does not apply when the DFA matching function is being used. The code for +JIT support was written by Zoltan Herczeg. +. +. +.SH "AVAILABILITY OF JIT SUPPORT" +.rs +.sp +JIT support is an optional feature of PCRE2. The "configure" option +--enable-jit (or equivalent CMake option) must be set when PCRE2 is built if +you want to use JIT. The support is limited to the following hardware +platforms: +.sp + ARM 32-bit (v7, and Thumb2) + ARM 64-bit + IBM s390x 64 bit + Intel x86 32-bit and 64-bit + LoongArch 64 bit + MIPS 32-bit and 64-bit + Power PC 32-bit and 64-bit + RISC-V 32-bit and 64-bit +.sp +If --enable-jit is set on an unsupported platform, compilation fails. +.P +A client program can tell if JIT support has been compiled by calling +\fBpcre2_config()\fP with the PCRE2_CONFIG_JIT option. The result is one if +PCRE2 was built with JIT support, and zero otherwise. However, having the JIT +code available does not guarantee that it will be used for any particular +match. One reason for this is that there are a number of options and pattern +items that are +.\" HTML +.\" +not supported by JIT +.\" +(see below). Another reason is that in some environments JIT is unable to get +executable memory in which to build its compiled code. The only guarantee from +\fBpcre2_config()\fP is that if it returns zero, JIT will definitely \fInot\fP +be used. +.P +As of release 10.45 there is a more informative way to test for JIT support. If +\fBpcre2_compile_jit()\fP is called with the single option PCRE2_JIT_TEST_ALLOC +it returns zero if JIT is available and has a working allocator. Otherwise it +returns PCRE2_ERROR_NOMEMORY if JIT is available but cannot allocate executable +memory, or PCRE2_ERROR_JIT_UNSUPPORTED if JIT support is not compiled. The +code argument is ignored, so it can be a NULL value. +.P +A simple program does not need to check availability in order to use JIT when +possible. The API is implemented in a way that falls back to the interpretive +code if JIT is not available or cannot be used for a given match. For programs +that need the best possible performance, there is a +.\" HTML +.\" +"fast path" +.\" +API that is JIT-specific. +. +. +.SH "SIMPLE USE OF JIT" +.rs +.sp +To make use of the JIT support in the simplest way, all you have to do is to +call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with +\fBpcre2_compile()\fP. This function has two arguments: the first is the +compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the +second is zero or more of the following option bits: PCRE2_JIT_COMPLETE, +PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. +.P +If JIT support is not available, a call to \fBpcre2_jit_compile()\fP does +nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern +is passed to the JIT compiler, which turns it into machine code that executes +much faster than the normal interpretive code, but yields exactly the same +results. The returned value from \fBpcre2_jit_compile()\fP is zero on success, +or a negative error code. +.P +There is a limit to the size of pattern that JIT supports, imposed by the size +of machine stack that it uses. The exact rules are not documented because they +may change at any time, in particular, when new optimizations are introduced. +If a pattern is too big, a call to \fBpcre2_jit_compile()\fP returns +PCRE2_ERROR_NOMEMORY. +.P +PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete +matches. If you want to run partial matches using the PCRE2_PARTIAL_HARD or +PCRE2_PARTIAL_SOFT options of \fBpcre2_match()\fP, you should set one or both +of the other options as well as, or instead of PCRE2_JIT_COMPLETE. The JIT +compiler generates different optimized code for each of the three modes +(normal, soft partial, hard partial). When \fBpcre2_match()\fP is called, the +appropriate code is run if it is available. Otherwise, the pattern is matched +using interpretive code. +.P +You can call \fBpcre2_jit_compile()\fP multiple times for the same compiled +pattern. It does nothing if it has previously compiled code for any of the +option bits. For example, you can call it once with PCRE2_JIT_COMPLETE and +(perhaps later, when you find you need partial matching) again with +PCRE2_JIT_COMPLETE and PCRE2_JIT_PARTIAL_HARD. This time it will ignore +PCRE2_JIT_COMPLETE and just compile code for partial matching. If +\fBpcre2_jit_compile()\fP is called with no option bits set, it immediately +returns zero. This is an alternative way of testing whether JIT support has +been compiled. +.P +At present, it is not possible to free JIT compiled code except when the entire +compiled pattern is freed by calling \fBpcre2_code_free()\fP. +.P +In some circumstances you may need to call additional functions. These are +described in the section entitled +.\" HTML +.\" +"Controlling the JIT stack" +.\" +below. +.P +There are some \fBpcre2_match()\fP options that are not supported by JIT, and +there are also some pattern items that JIT cannot handle. Details are given +.\" HTML +.\" +below. +.\" +In both cases, matching automatically falls back to the interpretive code. If +you want to know whether JIT was actually used for a particular match, you +should arrange for a JIT callback function to be set up as described in the +section entitled +.\" HTML +.\" +"Controlling the JIT stack" +.\" +below, even if you do not need to supply a non-default JIT stack. Such a +callback function is called whenever JIT code is about to be obeyed. If the +match-time options are not right for JIT execution, the callback function is +not obeyed. +.P +If the JIT compiler finds an unsupported item, no JIT data is generated. You +can find out if JIT compilation was successful for a compiled pattern by +calling \fBpcre2_pattern_info()\fP with the PCRE2_INFO_JITSIZE option. A +non-zero result means that JIT compilation was successful. A result of 0 means +that JIT support is not available, or the pattern was not processed by +\fBpcre2_jit_compile()\fP, or the JIT compiler was not able to handle the +pattern. Successful JIT compilation does not, however, guarantee the use of JIT +at match time because there are some match time options that are not supported +by JIT. +. +. +.SH "MATCHING SUBJECTS CONTAINING INVALID UTF" +.rs +.sp +When a pattern is compiled with the PCRE2_UTF option, subject strings are +normally expected to be a valid sequence of UTF code units. By default, this is +checked at the start of matching and an error is generated if invalid UTF is +detected. The PCRE2_NO_UTF_CHECK option can be passed to \fBpcre2_match()\fP to +skip the check (for improved performance) if you are sure that a subject string +is valid. If this option is used with an invalid string, the result is +undefined. The calling program may crash or loop or otherwise misbehave. +.P +However, a way of running matches on strings that may contain invalid UTF +sequences is available. Calling \fBpcre2_compile()\fP with the +PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in +\fBpcre2_match()\fP to support invalid UTF, and, if \fBpcre2_jit_compile()\fP +is subsequently called, the compiled JIT code also supports invalid UTF. +Details of how this support works, in both the JIT and the interpretive cases, +is given in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +There is also an obsolete option for \fBpcre2_jit_compile()\fP called +PCRE2_JIT_INVALID_UTF, which currently exists only for backward compatibility. +It is superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF +and should no longer be used. It may be removed in future. +. +. +.\" HTML +.SH "UNSUPPORTED OPTIONS AND PATTERN ITEMS" +.rs +.sp +The \fBpcre2_match()\fP options that are supported for JIT matching are +PCRE2_COPY_MATCHED_SUBJECT, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, +PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and +PCRE2_PARTIAL_SOFT. The PCRE2_ANCHORED and PCRE2_ENDANCHORED options are not +supported at match time. +.P +If the PCRE2_NO_JIT option is passed to \fBpcre2_match()\fP it disables the +use of JIT, forcing matching by the interpreter code. +.P +The only unsupported pattern items are \eC (match a single data unit) when +running in a UTF mode, and a callout immediately before an assertion condition +in a conditional group. +. +. +.SH "RETURN VALUES FROM JIT MATCHING" +.rs +.sp +When a pattern is matched using JIT, the return values are the same as those +given by the interpretive \fBpcre2_match()\fP code, with the addition of one +new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory used for +the JIT stack was insufficient. See +.\" HTML +.\" +"Controlling the JIT stack" +.\" +below for a discussion of JIT stack usage. +.P +The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if searching +a very large pattern tree goes on for too long, as it is in the same +circumstance when JIT is not used, but the details of exactly what is counted +are not the same. The PCRE2_ERROR_DEPTHLIMIT error code is never returned +when JIT matching is used. +. +. +.\" HTML +.SH "CONTROLLING THE JIT STACK" +.rs +.sp +When the compiled JIT code runs, it needs a block of memory to use as a stack. +By default, it uses 32KiB on the machine stack. However, some large or +complicated patterns need more than this. The error PCRE2_ERROR_JIT_STACKLIMIT +is given when there is not enough stack. Three functions are provided for +managing blocks of memory for use as JIT stacks. There is further discussion +about the use of JIT stacks in the section entitled +.\" HTML +.\" +"JIT stack FAQ" +.\" +below. +.P +The \fBpcre2_jit_stack_create()\fP function creates a JIT stack. Its arguments +are a starting size, a maximum size, and a general context (for memory +allocation functions, or NULL for standard memory allocation). It returns a +pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there +is an error. The \fBpcre2_jit_stack_free()\fP function is used to free a stack +that is no longer needed. If its argument is NULL, this function returns +immediately, without doing anything. (For the technically minded: the address +space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to +1MiB should be more than enough for any pattern. +.P +The \fBpcre2_jit_stack_assign()\fP function specifies which stack JIT code +should use. Its arguments are as follows: +.sp + pcre2_match_context *mcontext + pcre2_jit_callback callback + void *data +.sp +The first argument is a pointer to a match context. When this is subsequently +passed to a matching function, its information determines which JIT stack is +used. If this argument is NULL, the function returns immediately, without doing +anything. There are three cases for the values of the other two options: +.sp + (1) If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32KiB block + on the machine stack is used. This is the default when a match + context is created. +.sp + (2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be + a pointer to a valid JIT stack, the result of calling + \fBpcre2_jit_stack_create()\fP. +.sp + (3) If \fIcallback\fP is not NULL, it must point to a function that is + called with \fIdata\fP as an argument at the start of matching, in + order to set up a JIT stack. If the return from the callback + function is NULL, the internal 32KiB stack is used; otherwise the + return value must be a valid JIT stack, the result of calling + \fBpcre2_jit_stack_create()\fP. +.sp +A callback function is obeyed whenever JIT code is about to be run; it is not +obeyed when \fBpcre2_match()\fP is called with options that are incompatible +for JIT matching. A callback function can therefore be used to determine +whether a match operation was executed by JIT or by the interpreter. +.P +You may safely use the same JIT stack for more than one pattern (either by +assigning directly or by callback), as long as the patterns are matched +sequentially in the same thread. Currently, the only way to set up +non-sequential matches in one thread is to use callouts: if a callout function +starts another match, that match must use a different JIT stack to the one used +for currently suspended match(es). +.P +In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. +.P +Strictly speaking, even more is allowed. You can assign the same non-NULL stack +to a match context that is used by any number of patterns, as long as they are +not used for matching by multiple threads at the same time. For example, you +could use the same stack in all compiled patterns, with a global mutex in the +callback to wait until the stack is available for use. However, this is an +inefficient solution, and not recommended. +.P +This is a suggestion for how a multithreaded program that needs to set up +non-default JIT stacks might operate: +.sp + During thread initialization + thread_local_var = pcre2_jit_stack_create(...) +.sp + During thread exit + pcre2_jit_stack_free(thread_local_var) +.sp + Use a one-line callback function + return thread_local_var +.sp +All the functions described in this section do nothing if JIT is not available. +. +. +.\" HTML +.SH "JIT STACK FAQ" +.rs +.sp +(1) Why do we need JIT stacks? +.sp +PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack where +the local data of the current node is pushed before checking its child nodes. +Allocating real machine stack on some platforms is difficult. For example, the +stack chain needs to be updated every time if we extend the stack on PowerPC. +Although it is possible, its updating time overhead decreases performance. So +we do the recursion in memory. +.P +(2) Why don't we simply allocate blocks of memory with \fBmalloc()\fP? +.sp +Modern operating systems have a nice feature: they can reserve an address space +instead of allocating memory. We can safely allocate memory pages inside this +address space, so the stack could grow without moving memory data (this is +important because of pointers). Thus we can allocate 1MiB address space, and +use only a single memory page (usually 4KiB) if that is enough. However, we can +still grow up to 1MiB anytime if needed. +.P +(3) Who "owns" a JIT stack? +.sp +The owner of the stack is the user program, not the JIT studied pattern or +anything else. The user program must ensure that if a stack is being used by +\fBpcre2_match()\fP, (that is, it is assigned to a match context that is passed +to the pattern currently running), that stack must not be used by any other +threads (to avoid overwriting the same memory area). The best practice for +multithreaded programs is to allocate a stack for each thread, and return this +stack through the JIT callback function. +.P +(4) When should a JIT stack be freed? +.sp +You can free a JIT stack at any time, as long as it will not be used by +\fBpcre2_match()\fP again. When you assign the stack to a match context, only a +pointer is set. There is no reference counting or any other magic. You can free +compiled patterns, contexts, and stacks in any order, anytime. +Just \fIdo not\fP call \fBpcre2_match()\fP with a match context pointing to an +already freed stack, as that will cause SEGFAULT. (Also, do not free a stack +currently used by \fBpcre2_match()\fP in another thread). You can also replace +the stack in a context at any time when it is not in use. You should free the +previous stack before assigning a replacement. +.P +(5) Should I allocate/free a stack every time before/after calling +\fBpcre2_match()\fP? +.sp +No, because this is too costly in terms of resources. However, you could +implement some clever idea which release the stack if it is not used in let's +say two minutes. The JIT callback can help to achieve this without keeping a +list of patterns. +.P +(6) OK, the stack is for long term memory allocation. But what happens if a +pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the +stack is freed? +.sp +Especially on embedded systems, it might be a good idea to release memory +sometimes without freeing the stack. There is no API for this at the moment. +Probably a function call which returns with the currently allocated memory for +any stack and another which allows releasing memory (shrinking the stack) would +be a good idea if someone needs this. +.P +(7) This is too much of a headache. Isn't there any better solution for JIT +stack handling? +.sp +No, thanks to Windows. If POSIX threads were used everywhere, we could throw +out this complicated API. +. +. +.SH "FREEING JIT SPECULATIVE MEMORY" +.rs +.sp +.nf +.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); +.fi +.P +The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve +allocation speed. However, in low memory conditions, it might be better to free +all possible memory. You can cause this to happen by calling +pcre2_jit_free_unused_memory(). Its argument is a general context, for custom +memory management, or NULL for standard memory management. +. +. +.SH "EXAMPLE CODE" +.rs +.sp +This is a single-threaded example that specifies a JIT stack without using a +callback. A real program should include error checking after all the function +calls. +.sp + int rc; + pcre2_code *re; + pcre2_match_data *match_data; + pcre2_match_context *mcontext; + pcre2_jit_stack *jit_stack; +.sp + re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, + &errornumber, &erroffset, NULL); + rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); + mcontext = pcre2_match_context_create(NULL); + jit_stack = pcre2_jit_stack_create(32*1024, 512*1024, NULL); + pcre2_jit_stack_assign(mcontext, NULL, jit_stack); + match_data = pcre2_match_data_create(re, 10); + rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext); + /* Process result */ +.sp + pcre2_code_free(re); + pcre2_match_data_free(match_data); + pcre2_match_context_free(mcontext); + pcre2_jit_stack_free(jit_stack); +.sp +. +. +.\" HTML +.SH "JIT FAST PATH API" +.rs +.sp +Because the API described above falls back to interpreted matching when JIT is +not available, it is convenient for programs that are written for general use +in many environments. However, calling JIT via \fBpcre2_match()\fP does have a +performance impact. Programs that are written for use where JIT is known to be +available, and which need the best possible performance, can instead use a +"fast path" API to call JIT matching directly instead of calling +\fBpcre2_match()\fP (obviously only for patterns that have been successfully +processed by \fBpcre2_jit_compile()\fP). +.P +The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly +the same arguments as \fBpcre2_match()\fP. However, the subject string must be +specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported +option bits (for example, PCRE2_ANCHORED and PCRE2_ENDANCHORED) are ignored, as +is the PCRE2_NO_JIT option. The return values are also the same as for +\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial +or complete) is requested that was not compiled. +.P +When you call \fBpcre2_match()\fP, as well as testing for invalid options, a +number of other sanity checks are performed on the arguments. For example, if +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path. If invalid UTF data is passed when PCRE2_MATCH_INVALID_UTF was not +set for \fBpcre2_compile()\fP, the result is undefined. The program may crash +or loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you +should call \fBpcre2_jit_match()\fP in UTF mode only if you are sure the +subject is valid. +.P +Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give +speedups of more than 10%. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2api\fP(3), \fBpcre2unicode\fP(3) +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel (FAQ by Zoltan Herczeg) +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 22 August 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2limits.3 b/3rd/pcre2/doc/pcre2limits.3 new file mode 100644 index 00000000..d21def4e --- /dev/null +++ b/3rd/pcre2/doc/pcre2limits.3 @@ -0,0 +1,81 @@ +.TH PCRE2LIMITS 3 "16 August 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "SIZE AND OTHER LIMITATIONS" +.rs +.sp +There are some size limitations in PCRE2 but it is hoped that they will never +in practice be relevant. +.P +The maximum size of a compiled pattern is approximately 64 thousand code units +for the 8-bit and 16-bit libraries if PCRE2 is compiled with the default +internal linkage size, which is 2 bytes for these libraries. If you want to +process regular expressions that are truly enormous, you can compile PCRE2 with +an internal linkage size of 3 or 4 (when building the 16-bit library, 3 is +rounded up to 4). See the \fBREADME\fP file in the source distribution and the +.\" HREF +\fBpcre2build\fP +.\" +documentation for details. In these cases the limit is substantially larger. +However, the speed of execution is slower. In the 32-bit library, the internal +linkage size is always 4. +.P +The maximum length of a source pattern string is essentially unlimited; it is +the largest number a PCRE2_SIZE variable can hold. However, the program that +calls \fBpcre2_compile()\fP can specify a smaller limit. +.P +The maximum length (in code units) of a subject string is one less than the +largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned +integer type, usually defined as size_t. Its maximum value (that is +~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated strings +and unset offsets. +.P +All values in repeating quantifiers must be less than 65536. +.P +There are two different limits that apply to branches of lookbehind assertions. +If every branch in such an assertion matches a fixed number of characters, +the maximum length of any branch is 65535 characters. If any branch matches a +variable number of characters, then the maximum matching length for every +branch is limited. The default limit is set at compile time, defaulting to 255, +but can be changed by the calling program. +.P +There is no limit to the number of parenthesized groups, but there can be no +more than 65535 capture groups, and there is a limit to the depth of nesting of +parenthesized subpatterns of all kinds. This is imposed in order to limit the +amount of system stack used at compile time. The default limit can be specified +when PCRE2 is built; if not, the default is set to 250. An application can +change this limit by calling pcre2_set_parens_nest_limit() to set the limit in +a compile context. +.P +The maximum length of name for a named capture group is 32 code units, and the +maximum number of such groups is 10000. +.P +The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb +is 255 code units for the 8-bit library and 65535 code units for the 16-bit and +32-bit libraries. +.P +The maximum length of a string argument to a callout is the largest number a +32-bit unsigned integer can hold. +.P +The maximum amount of heap memory used for matching is controlled by the heap +limit, which can be set in a pattern or in a match context. The default is a +very large number, effectively unlimited. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 16 August 2023 +Copyright (c) 1997-2023 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2matching.3 b/3rd/pcre2/doc/pcre2matching.3 new file mode 100644 index 00000000..7a203e94 --- /dev/null +++ b/3rd/pcre2/doc/pcre2matching.3 @@ -0,0 +1,228 @@ +.TH PCRE2MATCHING 3 "30 August 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 MATCHING ALGORITHMS" +.rs +.sp +This document describes the two different algorithms that are available in +PCRE2 for matching a compiled regular expression against a given subject +string. The "standard" algorithm is the one provided by the \fBpcre2_match()\fP +function. This works in the same as Perl's matching function, and provides a +Perl-compatible matching operation. The just-in-time (JIT) optimization that is +described in the +.\" HREF +\fBpcre2jit\fP +.\" +documentation is compatible with this function. +.P +An alternative algorithm is provided by the \fBpcre2_dfa_match()\fP function; +it operates in a different way, and is not Perl-compatible. This alternative +has advantages and disadvantages compared with the standard algorithm, and +these are described below. +.P +When there is only one possible way in which a given subject string can match a +pattern, the two algorithms give the same answer. A difference arises, however, +when there are multiple possibilities. For example, if the anchored pattern +.sp + ^<.*> +.sp +is matched against the string +.sp + +.sp +there are three possible answers. The standard algorithm finds only one of +them, whereas the alternative algorithm finds all three. +. +. +.SH "REGULAR EXPRESSIONS AS TREES" +.rs +.sp +The set of strings that are matched by a regular expression can be represented +as a tree structure. An unlimited repetition in the pattern makes the tree of +infinite size, but it is still a tree. Matching the pattern to a given subject +string (from a given starting point) can be thought of as a search of the tree. +There are two ways to search a tree: depth-first and breadth-first, and these +correspond to the two matching algorithms provided by PCRE2. +. +. +.SH "THE STANDARD MATCHING ALGORITHM" +.rs +.sp +In the terminology of Jeffrey Friedl's book "Mastering Regular Expressions", +the standard algorithm is an "NFA algorithm". It conducts a depth-first search +of the pattern tree. That is, it proceeds along a single path through the tree, +checking that the subject matches what is required. When there is a mismatch, +the algorithm tries any alternatives at the current point, and if they all +fail, it backs up to the previous branch point in the tree, and tries the next +alternative branch at that level. This often involves backing up (moving to the +left) in the subject string as well. The order in which repetition branches are +tried is controlled by the greedy or ungreedy nature of the quantifier. +.P +If a leaf node is reached, a matching string has been found, and at that point +the algorithm stops. Thus, if there is more than one possible match, this +algorithm returns the first one that it finds. Whether this is the shortest, +the longest, or some intermediate length depends on the way the alternations +and the greedy or ungreedy repetition quantifiers are specified in the +pattern. +.P +Because it ends up with a single path through the tree, it is relatively +straightforward for this algorithm to keep track of the substrings that are +matched by portions of the pattern in parentheses. This provides support for +capturing parentheses and backreferences. +. +. +.SH "THE ALTERNATIVE MATCHING ALGORITHM" +.rs +.sp +This algorithm conducts a breadth-first search of the tree. Starting from the +first matching point in the subject, it scans the subject string from left to +right, once, character by character, and as it does this, it remembers all the +paths through the tree that represent valid matches. In Friedl's terminology, +this is a kind of "DFA algorithm", though it is not implemented as a +traditional finite state machine (it keeps multiple states active +simultaneously). +.P +Although the general principle of this matching algorithm is that it scans the +subject string only once, without backtracking, there is one exception: when a +lookaround assertion is encountered, the characters following or preceding the +current point have to be independently inspected. +.P +The scan continues until either the end of the subject is reached, or there are +no more unterminated paths. At this point, terminated paths represent the +different matching possibilities (if there are none, the match has failed). +Thus, if there is more than one possible match, this algorithm finds all of +them, and in particular, it finds the longest. The matches are returned in +the output vector in decreasing order of length. There is an option to stop the +algorithm after the first match (which is necessarily the shortest) is found. +.P +Note that the size of vector needed to contain all the results depends on the +number of simultaneous matches, not on the number of capturing parentheses in +the pattern. Using \fBpcre2_match_data_create_from_pattern()\fP to create the +match data block is therefore not advisable when doing DFA matching. +.P +Note also that all the matches that are found start at the same point in the +subject. If the pattern +.sp + cat(er(pillar)?)? +.sp +is matched against the string "the caterpillar catchment", the result is the +three strings "caterpillar", "cater", and "cat" that start at the fifth +character of the subject. The algorithm does not automatically move on to find +matches that start at later positions. +.P +PCRE2's "auto-possessification" optimization usually applies to character +repeats at the end of a pattern (as well as internally). For example, the +pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point +even considering the possibility of backtracking into the repeated digits. For +DFA matching, this means that only one possible match is found. If you really +do want multiple matches in such cases, either use an ungreedy repeat +("a\ed+?") or set the PCRE2_NO_AUTO_POSSESS option when compiling. +.P +There are a number of features of PCRE2 regular expressions that are not +supported or behave differently in the alternative matching function. Those +that are not supported cause an error if encountered. +.P +1. Because the algorithm finds all possible matches, the greedy or ungreedy +nature of repetition quantifiers is not relevant (though it may affect +auto-possessification, as just described). During matching, greedy and ungreedy +quantifiers are treated in exactly the same way. However, possessive +quantifiers can make a difference when what follows could also match what is +quantified, for example in a pattern like this: +.sp + ^a++\ew! +.sp +This pattern matches "aaab!" but not "aaa!", which would be matched by a +non-possessive quantifier. Similarly, if an atomic group is present, it is +matched as if it were a standalone pattern at the current point, and the +longest match is then "locked in" for the rest of the overall pattern. +.P +2. When dealing with multiple paths through the tree simultaneously, it is not +straightforward to keep track of captured substrings for the different matching +possibilities, and PCRE2's implementation of this algorithm does not attempt to +do this. This means that no captured substrings are available. +.P +3. Because no substrings are captured, a number of related features are not +available: +.sp +(a) Backreferences; +.sp +(b) Conditional expressions that use a backreference as the condition or test +for a specific group recursion; +.sp +(c) Script runs; +.sp +(d) Scan substring assertions. +.P +4. Because many paths through the tree may be active, the \eK escape sequence, +which resets the start of the match when encountered (but may be on some paths +and not on others), is not supported. +.P +5. Callouts are supported, but the value of the \fIcapture_top\fP field is +always 1, and the value of the \fIcapture_last\fP field is always 0. +.P +6. The \eC escape sequence, which (in the standard algorithm) always matches a +single code unit, even in a UTF mode, is not supported in UTF modes because +the alternative algorithm moves through the subject string one character (not +code unit) at a time, for all active paths through the tree. +.P +7. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not +supported. (*FAIL) is supported, and behaves like a failing negative assertion. +.P +8. The PCRE2_MATCH_INVALID_UTF option for \fBpcre2_compile()\fP is not +supported by \fBpcre2_dfa_match()\fP. +. +. +.SH "ADVANTAGES OF THE ALTERNATIVE ALGORITHM" +.rs +.sp +The main advantage of the alternative algorithm is that all possible matches +(at a single point in the subject) are automatically found, and in particular, +the longest match is found. To find more than one match at the same point using +the standard algorithm, you have to do kludgy things with callouts. +.P +Partial matching is possible with this algorithm, though it has some +limitations. The +.\" HREF +\fBpcre2partial\fP +.\" +documentation gives details of partial matching and discusses multi-segment +matching. +. +. +.SH "DISADVANTAGES OF THE ALTERNATIVE ALGORITHM" +.rs +.sp +The alternative algorithm suffers from a number of disadvantages: +.P +1. It is substantially slower than the standard algorithm. This is partly +because it has to search for all possible matches, but is also because it is +less susceptible to optimization. +.P +2. Capturing parentheses and other features such as backreferences that rely on +them are not supported. +.P +3. Matching within invalid UTF strings is not supported. +.P +4. Although atomic groups are supported, their use does not provide the +performance advantage that it does for the standard algorithm. +.P +5. JIT optimization is not supported. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 30 August 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2partial.3 b/3rd/pcre2/doc/pcre2partial.3 new file mode 100644 index 00000000..744f0064 --- /dev/null +++ b/3rd/pcre2/doc/pcre2partial.3 @@ -0,0 +1,373 @@ +.TH PCRE2PARTIAL 3 "27 November 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PARTIAL MATCHING IN PCRE2" +.rs +.sp +In normal use of PCRE2, if there is a match up to the end of a subject string, +but more characters are needed to match the entire pattern, PCRE2_ERROR_NOMATCH +is returned, just like any other failing match. There are circumstances where +it might be helpful to distinguish this "partial match" case. +.P +One example is an application where the subject string is very long, and not +all available at once. The requirement here is to be able to do the matching +segment by segment, but special action is needed when a matched substring spans +the boundary between two segments. +.P +Another example is checking a user input string as it is typed, to ensure that +it conforms to a required format. Invalid characters can be immediately +diagnosed and rejected, giving instant feedback. +.P +Partial matching is a PCRE2-specific feature; it is not Perl-compatible. It is +requested by setting one of the PCRE2_PARTIAL_HARD or PCRE2_PARTIAL_SOFT +options when calling a matching function. The difference between the two +options is whether or not a partial match is preferred to an alternative +complete match, though the details differ between the two types of matching +function. If both options are set, PCRE2_PARTIAL_HARD takes precedence. +.P +If you want to use partial matching with just-in-time optimized code, as well +as setting a partial match option for the matching function, you must also call +\fBpcre2_jit_compile()\fP with one or both of these options: +.sp + PCRE2_JIT_PARTIAL_HARD + PCRE2_JIT_PARTIAL_SOFT +.sp +PCRE2_JIT_COMPLETE should also be set if you are going to run non-partial +matches on the same pattern. Separate code is compiled for each mode. If the +appropriate JIT mode has not been compiled, interpretive matching code is used. +.P +Setting a partial matching option disables two of PCRE2's standard +optimization hints. PCRE2 remembers the last literal code unit in a pattern, +and abandons matching immediately if it is not present in the subject string. +This optimization cannot be used for a subject string that might match only +partially. PCRE2 also remembers a minimum length of a matching string, and does +not bother to run the matching function on shorter strings. This optimization +is also disabled for partial matching. +. +. +.SH "REQUIREMENTS FOR A PARTIAL MATCH" +.rs +.sp +A possible partial match occurs during matching when the end of the subject +string is reached successfully, but either more characters are needed to +complete the match, or the addition of more characters might change what is +matched. +.P +Example 1: if the pattern is /abc/ and the subject is "ab", more characters are +definitely needed to complete a match. In this case both hard and soft matching +options yield a partial match. +.P +Example 2: if the pattern is /ab+/ and the subject is "ab", a complete match +can be found, but the addition of more characters might change what is +matched. In this case, only PCRE2_PARTIAL_HARD returns a partial match; +PCRE2_PARTIAL_SOFT returns the complete match. +.P +On reaching the end of the subject, when PCRE2_PARTIAL_HARD is set, if the next +pattern item is \ez, \eZ, \eb, \eB, or $ there is always a partial match. +Otherwise, for both options, the next pattern item must be one that inspects a +character, and at least one of the following must be true: +.P +(1) At least one character has already been inspected. An inspected character +need not form part of the final matched string; lookbehind assertions and the +\eK escape sequence provide ways of inspecting characters before the start of a +matched string. +.P +(2) The pattern contains one or more lookbehind assertions. This condition +exists in case there is a lookbehind that inspects characters before the start +of the match. +.P +(3) There is a special case when the whole pattern can match an empty string. +When the starting point is at the end of the subject, the empty string match is +a possibility, and if PCRE2_PARTIAL_SOFT is set and neither of the above +conditions is true, it is returned. However, because adding more characters +might result in a non-empty match, PCRE2_PARTIAL_HARD returns a partial match, +which in this case means "there is going to be a match at this point, but until +some more characters are added, we do not know if it will be an empty string or +something longer". +. +. +. +.SH "PARTIAL MATCHING USING pcre2_match()" +.rs +.sp +When a partial matching option is set, the result of calling +\fBpcre2_match()\fP can be one of the following: +.TP 2 +\fBA successful match\fP +A complete match has been found, starting and ending within this subject. +.TP +\fBPCRE2_ERROR_NOMATCH\fP +No match can start anywhere in this subject. +.TP +\fBPCRE2_ERROR_PARTIAL\fP +Adding more characters may result in a complete match that uses one or more +characters from the end of this subject. +.P +When a partial match is returned, the first two elements in the ovector point +to the portion of the subject that was matched, but the values in the rest of +the ovector are undefined. The appearance of \eK in the pattern has no effect +for a partial match. Consider this pattern: +.sp + /abc\eK123/ +.sp +If it is matched against "456abc123xyz" the result is a complete match, and the +ovector defines the matched string as "123", because \eK resets the "start of +match" point. However, if a partial match is requested and the subject string +is "456abc12", a partial match is found for the string "abc12", because all +these characters are needed for a subsequent re-match with additional +characters. +.P +If there is more than one partial match, the first one that was found provides +the data that is returned. Consider this pattern: +.sp + /123\ew+X|dogY/ +.sp +If this is matched against the subject string "abc123dog", both alternatives +fail to match, but the end of the subject is reached during matching, so +PCRE2_ERROR_PARTIAL is returned. The offsets are set to 3 and 9, identifying +"123dog" as the first partial match. (In this example, there are two partial +matches, because "dog" on its own partially matches the second alternative.) +. +. +.SS "How a partial match is processed by pcre2_match()" +.rs +.sp +What happens when a partial match is identified depends on which of the two +partial matching options is set. +.P +If PCRE2_PARTIAL_HARD is set, PCRE2_ERROR_PARTIAL is returned as soon as a +partial match is found, without continuing to search for possible complete +matches. This option is "hard" because it prefers an earlier partial match over +a later complete match. For this reason, the assumption is made that the end of +the supplied subject string is not the true end of the available data, which is +why \ez, \eZ, \eb, \eB, and $ always give a partial match. +.P +If PCRE2_PARTIAL_SOFT is set, the partial match is remembered, but matching +continues as normal, and other alternatives in the pattern are tried. If no +complete match can be found, PCRE2_ERROR_PARTIAL is returned instead of +PCRE2_ERROR_NOMATCH. This option is "soft" because it prefers a complete match +over a partial match. All the various matching items in a pattern behave as if +the subject string is potentially complete; \ez, \eZ, and $ match at the end of +the subject, as normal, and for \eb and \eB the end of the subject is treated +as a non-alphanumeric. +.P +The difference between the two partial matching options can be illustrated by a +pattern such as: +.sp + /dog(sbody)?/ +.sp +This matches either "dog" or "dogsbody", greedily (that is, it prefers the +longer string if possible). If it is matched against the string "dog" with +PCRE2_PARTIAL_SOFT, it yields a complete match for "dog". However, if +PCRE2_PARTIAL_HARD is set, the result is PCRE2_ERROR_PARTIAL. On the other +hand, if the pattern is made ungreedy the result is different: +.sp + /dog(sbody)??/ +.sp +In this case the result is always a complete match because that is found first, +and matching never continues after finding a complete match. It might be easier +to follow this explanation by thinking of the two patterns like this: +.sp + /dog(sbody)?/ is the same as /dogsbody|dog/ + /dog(sbody)??/ is the same as /dog|dogsbody/ +.sp +The second pattern will never match "dogsbody", because it will always find the +shorter match first. +. +. +.SS "Example of partial matching using pcre2test" +.rs +.sp +The \fBpcre2test\fP data modifiers \fBpartial_hard\fP (or \fBph\fP) and +\fBpartial_soft\fP (or \fBps\fP) set PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT, +respectively, when calling \fBpcre2_match()\fP. Here is a run of +\fBpcre2test\fP using a pattern that matches the whole subject in the form of a +date: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 25dec3\e=ph + Partial match: 23dec3 + data> 3ju\e=ph + Partial match: 3ju + data> 3juj\e=ph + No match +.sp +This example gives the same results for both hard and soft partial matching +options. Here is an example where there is a difference: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 25jun04\e=ps + 0: 25jun04 + 1: jun + data> 25jun04\e=ph + Partial match: 25jun04 +.sp +With PCRE2_PARTIAL_SOFT, the subject is matched completely. For +PCRE2_PARTIAL_HARD, however, the subject is assumed not to be complete, so +there is only a partial match. +. +. +. +.SH "MULTI-SEGMENT MATCHING WITH pcre2_match()" +.rs +.sp +PCRE was not originally designed with multi-segment matching in mind. However, +over time, features (including partial matching) that make multi-segment +matching possible have been added. A very long string can be searched segment +by segment by calling \fBpcre2_match()\fP repeatedly, with the aim of achieving +the same results that would happen if the entire string was available for +searching all the time. Normally, the strings that are being sought are much +shorter than each individual segment, and are in the middle of very long +strings, so the pattern is normally not anchored. +.P +Special logic must be implemented to handle a matched substring that spans a +segment boundary. PCRE2_PARTIAL_HARD should be used, because it returns a +partial match at the end of a segment whenever there is the possibility of +changing the match by adding more characters. The PCRE2_NOTBOL option should +also be set for all but the first segment. +.P +When a partial match occurs, the next segment must be added to the current +subject and the match re-run, using the \fIstartoffset\fP argument of +\fBpcre2_match()\fP to begin at the point where the partial match started. +For example: +.sp + re> /\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed/ + data> ...the date is 23ja\e=ph + Partial match: 23ja + data> ...the date is 23jan19 and on that day...\e=offset=15 + 0: 23jan19 + 1: jan +.sp +Note the use of the \fBoffset\fP modifier to start the new match where the +partial match was found. In this example, the next segment was added to the one +in which the partial match was found. This is the most straightforward +approach, typically using a memory buffer that is twice the size of each +segment. After a partial match, the first half of the buffer is discarded, the +second half is moved to the start of the buffer, and a new segment is added +before repeating the match as in the example above. After a no match, the +entire buffer can be discarded. +.P +If there are memory constraints, you may want to discard text that precedes a +partial match before adding the next segment. Unfortunately, this is not at +present straightforward. In cases such as the above, where the pattern does not +contain any lookbehinds, it is sufficient to retain only the partially matched +substring. However, if the pattern contains a lookbehind assertion, characters +that precede the start of the partial match may have been inspected during the +matching process. When \fBpcre2test\fP displays a partial match, it indicates +these characters with '<' if the \fBallusedtext\fP modifier is set: +.sp + re> "(?<=123)abc" + data> xx123ab\e=ph,allusedtext + Partial match: 123ab + <<< +.sp +However, the \fBallusedtext\fP modifier is not available for JIT matching, +because JIT matching does not record the first (or last) consulted characters. +For this reason, this information is not available via the API. It is therefore +not possible in general to obtain the exact number of characters that must be +retained in order to get the right match result. If you cannot retain the +entire segment, you must find some heuristic way of choosing. +.P +If you know the approximate length of the matching substrings, you can use that +to decide how much text to retain. The only lookbehind information that is +currently available via the API is the length of the longest individual +lookbehind in a pattern, but this can be misleading if there are nested +lookbehinds. The value returned by calling \fBpcre2_pattern_info()\fP with the +PCRE2_INFO_MAXLOOKBEHIND option is the maximum number of characters (not code +units) that any individual lookbehind moves back when it is processed. A +pattern such as "(?<=(? /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 23ja\e=dfa,ps + Partial match: 23ja + data> n05\e=dfa,dfa_restart + 0: n05 +.sp +The first call has "23ja" as the subject, and requests partial matching; the +second call has "n05" as the subject for the continued (restarted) match. +Notice that when the match is complete, only the last part is shown; PCRE2 does +not retain the previously partially-matched string. It is up to the calling +program to do that if it needs to. This means that, for an unanchored pattern, +if a continued match fails, it is not possible to try again at a new starting +point. All this facility is capable of doing is continuing with the previous +match attempt. For example, consider this pattern: +.sp + 1234|3789 +.sp +If the first part of the subject is "ABC123", a partial match of the first +alternative is found at offset 3. There is no partial match for the second +alternative, because such a match does not start at the same point in the +subject string. Attempting to continue with the string "7890" does not yield a +match because only those alternatives that match at one point in the subject +are remembered. Depending on the application, this may or may not be what you +want. +.P +If you do want to allow for starting again at the next character, one way of +doing it is to retain some or all of the segment and try a new complete match, +as described for \fBpcre2_match()\fP above. Another possibility is to work with +two buffers. If a partial match at offset \fIn\fP in the first buffer is +followed by "no match" when PCRE2_DFA_RESTART is used on the second buffer, you +can then try a new match starting at offset \fIn+1\fP in the first buffer. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 November 2024 +Copyright (c) 1997-2019 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2pattern.3 b/3rd/pcre2/doc/pcre2pattern.3 new file mode 100644 index 00000000..583222ab --- /dev/null +++ b/3rd/pcre2/doc/pcre2pattern.3 @@ -0,0 +1,4191 @@ +.TH PCRE2PATTERN 3 "27 November 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 REGULAR EXPRESSION DETAILS" +.rs +.sp +The syntax and semantics of the regular expressions that are supported by PCRE2 +are described in detail below. There is a quick-reference syntax summary in the +.\" HREF +\fBpcre2syntax\fP +.\" +page. PCRE2 tries to match Perl syntax and semantics as closely as it can. +PCRE2 also supports some alternative regular expression syntax that does not +conflict with the Perl syntax in order to provide some compatibility with +regular expressions in Python, .NET, and Oniguruma. There are in addition some +options that enable alternative syntax and semantics that are not the same as +in Perl. +.P +Perl's regular expressions are described in its own documentation, and regular +expressions in general are covered in a number of books, some of which have +copious examples. Jeffrey Friedl's "Mastering Regular Expressions", published +by O'Reilly, covers regular expressions in great detail. This description of +PCRE2's regular expressions is intended as reference material. +.P +This document discusses the regular expression patterns that are supported by +PCRE2 when its main matching function, \fBpcre2_match()\fP, is used. PCRE2 also +has an alternative matching function, \fBpcre2_dfa_match()\fP, which matches +using a different algorithm that is not Perl-compatible. Some of the features +discussed below are not available when DFA matching is used. The advantages and +disadvantages of the alternative function, and how it differs from the normal +function, are discussed in the +.\" HREF +\fBpcre2matching\fP +.\" +page. +. +. +.SH "EBCDIC CHARACTER CODES" +.rs +.sp +Most computers use ASCII or Unicode for encoding characters, and PCRE2 assumes +this by default. However, it can be compiled to run in an environment that uses +the EBCDIC code, which is the case for some IBM mainframe operating systems. In +the sections below, character code values are ASCII or Unicode; in an EBCDIC +environment these characters may have different code values, and there are no +code points greater than 255. Differences in behaviour when PCRE2 is running in +an EBCDIC environment are described in the section +.\" HTML +.\" +"EBCDIC environments" +.\" +below, which you can ignore unless you really are in an EBCDIC environment. +. +. +.SH "SPECIAL START-OF-PATTERN ITEMS" +.rs +.sp +A number of options that can be passed to \fBpcre2_compile()\fP can also be set +by special items at the start of a pattern. These are not Perl-compatible, but +are provided to make these options accessible to pattern writers who are not +able to change the program that processes the pattern. Any number of these +items may appear, but they must all be together right at the start of the +pattern string, and the letters must be in upper case. +. +. +.SS "UTF support" +.rs +.sp +In the 8-bit and 16-bit PCRE2 libraries, characters may be coded either as +single code units, or as multiple UTF-8 or UTF-16 code units. UTF-32 can be +specified for the 32-bit library, in which case it constrains the character +values to valid Unicode code points. To process UTF strings, PCRE2 must be +built to include Unicode support (which is the default). When using UTF strings +you must either call the compiling function with one or both of the PCRE2_UTF +or PCRE2_MATCH_INVALID_UTF options, or the pattern must start with the special +sequence (*UTF), which is equivalent to setting the relevant PCRE2_UTF. How +setting a UTF mode affects pattern matching is mentioned in several places +below. There is also a summary of features in the +.\" HREF +\fBpcre2unicode\fP +.\" +page. +.P +Some applications that allow their users to supply patterns may wish to +restrict them to non-UTF data for security reasons. If the PCRE2_NEVER_UTF +option is passed to \fBpcre2_compile()\fP, (*UTF) is not allowed, and its +appearance in a pattern causes an error. +. +. +.SS "Unicode property support" +.rs +.sp +Another special sequence that may appear at the start of a pattern is (*UCP). +This has the same effect as setting the PCRE2_UCP option: it causes sequences +such as \ed and \ew to use Unicode properties to determine character types, +instead of recognizing only characters with codes less than 256 via a lookup +table. If also causes upper/lower casing operations to use Unicode properties +for characters with code points greater than 127, even when UTF is not set. +These behaviours can be changed within the pattern; see the section entitled +.\" HTML +.\" +"Internal Option Setting" +.\" +below. +.P +Some applications that allow their users to supply patterns may wish to +restrict them for security reasons. If the PCRE2_NEVER_UCP option is passed to +\fBpcre2_compile()\fP, (*UCP) is not allowed, and its appearance in a pattern +causes an error. +. +. +.SS "Locking out empty string matching" +.rs +.sp +Starting a pattern with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) has the same effect +as passing the PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART option to whichever +matching function is subsequently called to match the pattern. These options +lock out the matching of empty strings, either entirely, or only at the start +of the subject. +. +. +.SS "Disabling auto-possessification" +.rs +.sp +If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting +the PCRE2_NO_AUTO_POSSESS option, or calling \fBpcre2_set_optimize()\fP with +a PCRE2_AUTO_POSSESS_OFF directive. This stops PCRE2 from making quantifiers +possessive when what follows cannot match the repeated item. For example, by +default a+b is treated as a++b. For more details, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SS "Disabling start-up optimizations" +.rs +.sp +If a pattern starts with (*NO_START_OPT), it has the same effect as setting the +PCRE2_NO_START_OPTIMIZE option, or calling \fBpcre2_set_optimize()\fP with +a PCRE2_START_OPTIMIZE_OFF directive. This disables several optimizations for +quickly reaching "no match" results. For more details, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SS "Disabling automatic anchoring" +.rs +.sp +If a pattern starts with (*NO_DOTSTAR_ANCHOR), it has the same effect as +setting the PCRE2_NO_DOTSTAR_ANCHOR option, or calling \fBpcre2_set_optimize()\fP +with a PCRE2_DOTSTAR_ANCHOR_OFF directive. This disables optimizations that +apply to patterns whose top-level branches all start with .* (match any number +of arbitrary characters). For more details, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SS "Disabling JIT compilation" +.rs +.sp +If a pattern that starts with (*NO_JIT) is successfully compiled, an attempt by +the application to apply the JIT optimization by calling +\fBpcre2_jit_compile()\fP is ignored. +. +. +.SS "Setting match resource limits" +.rs +.sp +The \fBpcre2_match()\fP function contains a counter that is incremented every +time it goes round its main loop. The caller of \fBpcre2_match()\fP can set a +limit on this counter, which therefore limits the amount of computing resource +used for a match. The maximum depth of nested backtracking can also be limited; +this indirectly restricts the amount of heap memory that is used, but there is +also an explicit memory limit that can be set. +.P +These facilities are provided to catch runaway matches that are provoked by +patterns with huge matching trees. A common example is a pattern with nested +unlimited repeats applied to a long string that does not match. When one of +these limits is reached, \fBpcre2_match()\fP gives an error return. The limits +can also be set by items at the start of the pattern of the form +.sp + (*LIMIT_HEAP=d) + (*LIMIT_MATCH=d) + (*LIMIT_DEPTH=d) +.sp +where d is any number of decimal digits. However, the value of the setting must +be less than the value set (or defaulted) by the caller of \fBpcre2_match()\fP +for it to have any effect. In other words, the pattern writer can lower the +limits set by the programmer, but not raise them. If there is more than one +setting of one of these limits, the lower value is used. The heap limit is +specified in kibibytes (units of 1024 bytes). +.P +Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is +still recognized for backwards compatibility. +.P +The heap limit applies only when the \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP interpreters are used for matching. It does not apply +to JIT. The match limit is used (but in a different way) when JIT is being +used, or when \fBpcre2_dfa_match()\fP is called, to limit computing resource +usage by those matching functions. The depth limit is ignored by JIT but is +relevant for DFA matching, which uses function recursion for recursions within +the pattern and for lookaround assertions and atomic groups. In this case, the +depth limit controls the depth of such recursion. +. +. +.\" HTML +.SS "Newline conventions" +.rs +.sp +PCRE2 supports six different conventions for indicating line breaks in +strings: a single CR (carriage return) character, a single LF (linefeed) +character, the two-character sequence CRLF, any of the three preceding, any +Unicode newline sequence, or the NUL character (binary zero). The +.\" HREF +\fBpcre2api\fP +.\" +page has +.\" HTML +.\" +further discussion +.\" +about newlines, and shows how to set the newline convention when calling +\fBpcre2_compile()\fP. +.P +It is also possible to specify a newline convention by starting a pattern +string with one of the following sequences: +.sp + (*CR) carriage return + (*LF) linefeed + (*CRLF) carriage return, followed by linefeed + (*ANYCRLF) any of the three above + (*ANY) all Unicode newline sequences + (*NUL) the NUL character (binary zero) +.sp +These override the default and the options given to the compiling function. For +example, on a Unix system where LF is the default newline sequence, the pattern +.sp + (*CR)a.b +.sp +changes the convention to CR. That pattern matches "a\enb" because LF is no +longer a newline. If more than one of these settings is present, the last one +is used. +.P +The newline convention affects where the circumflex and dollar assertions are +true. It also affects the interpretation of the dot metacharacter when +PCRE2_DOTALL is not set, and the behaviour of \eN when not followed by an +opening brace. However, it does not affect what the \eR escape sequence +matches. By default, this is any Unicode newline sequence, for Perl +compatibility. However, this can be changed; see the next section and the +description of \eR in the section entitled +.\" HTML +.\" +"Newline sequences" +.\" +below. A change of \eR setting can be combined with a change of newline +convention. +. +. +.SS "Specifying what \eR matches" +.rs +.sp +It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. This effect can also be achieved by starting a pattern with +(*BSR_ANYCRLF). For completeness, (*BSR_UNICODE) is also recognized, +corresponding to PCRE2_BSR_UNICODE. +. +. +.SH "CHARACTERS AND METACHARACTERS" +.rs +.sp +A regular expression is a pattern that is matched against a subject string from +left to right. Most characters stand for themselves in a pattern, and match the +corresponding characters in the subject. As a trivial example, the pattern +.sp + The quick brown fox +.sp +matches a portion of a subject string that is identical to itself. When +caseless matching is specified (the PCRE2_CASELESS option or (?i) within the +pattern), letters are matched independently of case. Note that there are two +ASCII characters, K and S, that, in addition to their lower case ASCII +equivalents, are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F +(long S) respectively when either PCRE2_UTF or PCRE2_UCP is set, unless the +PCRE2_EXTRA_CASELESS_RESTRICT option is in force (either passed to +\fBpcre2_compile()\fP or set by (*CASELESS_RESTRICT) or (?r) within the +pattern). If the PCRE2_EXTRA_TURKISH_CASING option is in force (either passed +to \fBpcre2_compile()\fP or set by (*TURKISH_CASING) within the pattern), then +the 'i' letters are matched according to Turkish and Azeri languages. +.P +The power of regular expressions comes from the ability to include wild cards, +character classes, alternatives, and repetitions in the pattern. These are +encoded in the pattern by the use of \fImetacharacters\fP, which do not stand +for themselves but instead are interpreted in some special way. +.P +There are two different sets of metacharacters: those that are recognized +anywhere in the pattern except within square brackets, and those that are +recognized within square brackets. Outside square brackets, the metacharacters +are as follows: +.sp + \e general escape character with several uses + ^ assert start of string (or line, in multiline mode) + $ assert end of string (or line, in multiline mode) + . match any character except newline (by default) + [ start character class definition + | start of alternative branch + ( start group or control verb + ) end group or control verb + * 0 or more quantifier + + 1 or more quantifier; also "possessive quantifier" + ? 0 or 1 quantifier; also quantifier minimizer + { potential start of min/max quantifier +.sp +Brace characters { and } are also used to enclose data for constructions such +as \eg{2} or \ek{name}. In almost all uses of braces, space and/or horizontal +tab characters that follow { or precede } are allowed and are ignored. In the +case of quantifiers, they may also appear before or after the comma. The +exception to this is \eu{...} which is an ECMAScript compatibility feature +that is recognized only when the PCRE2_EXTRA_ALT_BSUX option is set. ECMAScript +does not ignore such white space; it causes the item to be interpreted as +literal. +.P +Part of a pattern that is in square brackets is called a "character class". In +a character class the only metacharacters are: +.sp + \e general escape character + ^ negate the class, but only if the first character + - indicates character range + [ POSIX character class (if followed by POSIX syntax) + ] terminates the character class +.sp +If a pattern is compiled with the PCRE2_EXTENDED option, most white space in +the pattern, other than in a character class, within a \eQ...\eE sequence, or +between a # outside a character class and the next newline, inclusive, is +ignored. An escaping backslash can be used to include a white space or a # +character as part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the +same applies, but in addition unescaped space and horizontal tab characters are +ignored inside a character class. Note: only these two characters are ignored, +not the full set of pattern white space characters that are ignored outside a +character class. Option settings can be changed within a pattern; see the +section entitled +.\" HTML +.\" +"Internal Option Setting" +.\" +below. +.P +The following sections describe the use of each of the metacharacters. +. +. +.SH BACKSLASH +.rs +.sp +The backslash character has several uses. Firstly, if it is followed by a +character that is not a digit or a letter, it takes away any special meaning +that character may have. This use of backslash as an escape character applies +both inside and outside character classes. +.P +For example, if you want to match a * character, you must write \e* in the +pattern. This escaping action applies whether or not the following character +would otherwise be interpreted as a metacharacter, so it is always safe to +precede a non-alphanumeric with backslash to specify that it stands for itself. +In particular, if you want to match a backslash, you write \e\e. +.P +Only ASCII digits and letters have any special meaning after a backslash. All +other characters (in particular, those whose code points are greater than 127) +are treated as literals. +.P +If you want to treat all characters in a sequence as literals, you can do so by +putting them between \eQ and \eE. Note that this includes white space even when +the PCRE2_EXTENDED option is set so that most other white space is ignored. The +behaviour is different from Perl in that $ and @ are handled as literals in +\eQ...\eE sequences in PCRE2, whereas in Perl, $ and @ cause variable +interpolation. Also, Perl does "double-quotish backslash interpolation" on any +backslashes between \eQ and \eE which, its documentation says, "may lead to +confusing results". PCRE2 treats a backslash between \eQ and \eE just like any +other character. Note the following examples: +.sp + Pattern PCRE2 matches Perl matches +.sp +.\" JOIN + \eQabc$xyz\eE abc$xyz abc followed by the + contents of $xyz + \eQabc\e$xyz\eE abc\e$xyz abc\e$xyz + \eQabc\eE\e$\eQxyz\eE abc$xyz abc$xyz + \eQA\eB\eE A\eB A\eB + \eQ\e\eE \e \e\eE +.sp +The \eQ...\eE sequence is recognized both inside and outside character classes. +An isolated \eE that is not preceded by \eQ is ignored. If \eQ is not followed +by \eE later in the pattern, the literal interpretation continues to the end of +the pattern (that is, \eE is assumed at the end). If the isolated \eQ is inside +a character class, this causes an error, because the character class is then +not terminated by a closing square bracket. +.P +Another difference from Perl is that any appearance of \eQ or \eE inside what +might otherwise be a quantifier causes PCRE2 not to recognize the sequence as a +quantifier. Perl recognizes a quantifier if (redundantly) either of the numbers +is inside \eQ...\eE, but not if the separating comma is. When not recognized as +a quantifier a sequence such as {\eQ1\eE,2} is treated as the literal string +"{1,2}". +. +. +.\" HTML +.SS "Non-printing characters" +.rs +.sp +A second use of backslash provides a way of encoding non-printing characters +in patterns in a visible manner. There is no restriction on the appearance of +non-printing characters in a pattern, but when a pattern is being prepared by +text editing, it is often easier to use one of the following escape sequences +instead of the binary character it represents. In an ASCII or Unicode +environment, these escapes are as follows: +.sp + \ea alarm, that is, the BEL character (hex 07) + \ecx "control-x", where x is a non-control ASCII character + \ee escape (hex 1B) + \ef form feed (hex 0C) + \en linefeed (hex 0A) + \er carriage return (hex 0D) (but see below) + \et tab (hex 09) + \e0dd character with octal code 0dd + \eddd character with octal code ddd, or back reference + \eo{ddd..} character with octal code ddd.. + \exhh character with hex code hh + \ex{hhh..} character with hex code hhh.. + \eN{U+hhh..} character with Unicode hex code point hhh.. +.sp +A description of how back references work is given +.\" HTML +.\" +later, +.\" +following the discussion of +.\" HTML +.\" +parenthesized groups. +.\" +.P +By default, after \ex that is not followed by {, one or two hexadecimal +digits are read (letters can be in upper or lower case). If the character that +follows \ex is neither { nor a hexadecimal digit, an error occurs. This is +different from Perl's default behaviour, which generates a NUL character, but +is in line with the behaviour of Perl's 'strict' mode in re. +.P +Any number of hexadecimal digits may appear between \ex{ and }. If a character +other than a hexadecimal digit appears between \ex{ and }, or if there is no +terminating }, an error occurs. +.P +Characters whose code points are less than 256 can be defined by either of the +two syntaxes for \ex or by an octal sequence. There is no difference in the way +they are handled. For example, \exdc is exactly the same as \ex{dc} or \e334. +However, using the braced versions does make such sequences easier to read. +.P +Support is available for some ECMAScript (aka JavaScript) escape sequences via +two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \ex followed +by { is not recognized. Only if \ex is followed by two hexadecimal digits is it +recognized as a character escape. Otherwise it is interpreted as a literal "x" +character. In this mode, support for code points greater than 256 is provided +by \eu, which must be followed by four hexadecimal digits; otherwise it is +interpreted as a literal "u" character. +.P +PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition, +\eu{hhh..} is recognized as the character specified by hexadecimal code point. +There may be any number of hexadecimal digits, but unlike other places that +also use curly brackets, spaces are not allowed and would result in the string +being interpreted as a literal. This syntax is from ECMAScript 6. +.P +The \eN{U+hhh..} escape sequence is recognized only when PCRE2 is operating in +UTF mode. Perl also uses \eN{name} to specify characters by Unicode name; PCRE2 +does not support this. Note that when \eN is not followed by an opening brace +(curly bracket) it has an entirely different meaning, matching any character +that is not a newline. +.P +There are some legacy applications where the escape sequence \er is expected to +match a newline. If the PCRE2_EXTRA_ESCAPED_CR_IS_LF option is set, \er in a +pattern is converted to \en so that it matches a LF (linefeed) instead of a CR +(carriage return) character. +.P +An error occurs if \ec is not followed by a character whose ASCII code point +is in the range 32 to 126. The precise effect of \ecx is as follows: if x is a +lower case letter, it is converted to upper case. Then bit 6 of the character +(hex 40) is inverted. Thus \ecA to \ecZ become hex 01 to hex 1A (A is 41, Z is +5A), but \ec{ becomes hex 3B ({ is 7B), and \ec; becomes hex 7B (; is 3B). If +the code unit following \ec has a code point less than 32 or greater than 126, +a compile-time error occurs. +.P +For differences in the way some escapes behave in EBCDIC environments, +see section +.\" HTML +.\" +"EBCDIC environments" +.\" +below. +. +. +.SS "Octal escapes and back references" +.rs +.sp +The escape \eo must be followed by a sequence of octal digits, enclosed in +braces. An error occurs if this is not the case. This escape provides a way of +specifying character code points as octal numbers greater than 0777, and it +also allows octal numbers and backreferences to be unambiguously distinguished. +.P +If braces are not used, after \e0 up to two further octal digits are read. +However, if the PCRE2_EXTRA_NO_BS0 option is set, at least one more octal digit +must follow \e0 (use \e00 to generate a NUL character). Make sure you supply +two digits after the initial zero if the pattern character that follows is +itself an octal digit. +.P +Inside a character class, when a backslash is followed by any octal digit, up +to three octal digits are read to generate a code point. Any subsequent digits +stand for themselves. The sequences \e8 and \e9 are treated as the literal +characters "8" and "9". +.P +Outside a character class, Perl's handling of a backslash followed by a digit +other than 0 is complicated by ambiguity, and Perl has changed over time, +causing PCRE2 also to change. From PCRE2 release 10.45 there is an option +called PCRE2_EXTRA_PYTHON_OCTAL that causes PCRE2 to use Python's unambiguous +rules. The next two subsections describe the two sets of rules. +.P +For greater clarity and unambiguity, it is best to avoid following \e by a +digit greater than zero. Instead, use \eo{...} or \ex{...} to specify numerical +character code points, and \eg{...} to specify backreferences. +. +. +.SS "Perl rules for non-class backslash 1-9" +.rs +.sp +All the digits that follow the backslash are read as a decimal number. If the +number is less than 10, begins with the digit 8 or 9, or if there are at least +that many previous capture groups in the expression, the entire sequence is +taken as a back reference. Otherwise, up to three octal digits are read to form +a character code. For example: +.sp + \e040 is another way of writing an ASCII space +.\" JOIN + \e40 is the same, provided there are fewer than 40 + previous capture groups + \e7 is always a backreference +.\" JOIN + \e11 might be a backreference, or another way of + writing a tab + \e011 is always a tab + \e0113 is a tab followed by the character "3" +.\" JOIN + \e113 might be a backreference, otherwise the + character with octal code 113 +.\" JOIN + \e377 might be a backreference, otherwise + the value 255 (decimal) + \e81 is always a backreference +.sp +Note that octal values of 100 or greater that are specified using this syntax +must not be introduced by a leading zero, because no more than three octal +digits are ever read. +. +. +.SS "Python rules for non_class backslash 1-9" +.rs +.sp +If there are at least three octal digits after the backslash, exactly three are +read as an octal code point number, but the value must be no greater than +\e377, even in modes where higher code point values are supported. Any +subsequent digits stand for themselves. If there are fewer than three octal +digits, the sequence is taken as a decimal back reference. Thus, for example, +\e12 is always a back reference, independent of how many captures there are in +the pattern. An error is generated for a reference to a non-existent capturing +group. +. +. +.SS "Constraints on character values" +.rs +.sp +Characters that are specified using octal or hexadecimal numbers are +limited to certain values, as follows: +.sp + 8-bit non-UTF mode no greater than 0xff + 16-bit non-UTF mode no greater than 0xffff + 32-bit non-UTF mode no greater than 0xffffffff + All UTF modes no greater than 0x10ffff and a valid code point +.sp +Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the +so-called "surrogate" code points). The check for these can be disabled by the +caller of \fBpcre2_compile()\fP by setting the option +PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8 +and UTF-32 modes, because these values are not representable in UTF-16. +. +. +.SS "Escape sequences in character classes" +.rs +.sp +All the sequences that define a single character value can be used both inside +and outside character classes. In addition, inside a character class, \eb is +interpreted as the backspace character (hex 08). +.P +When not followed by an opening brace, \eN is not allowed in a character class. +\eB, \eR, and \eX are not special inside a character class. Like other +unrecognized alphabetic escape sequences, they cause an error. Outside a +character class, these sequences have different meanings. +. +. +.SS "Unsupported escape sequences" +.rs +.sp +In Perl, the sequences \eF, \el, \eL, \eu, and \eU are recognized by its string +handler and used to modify the case of following characters. By default, PCRE2 +does not support these escape sequences in patterns. However, if either of the +PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \eU matches a "U" +character, and \eu can be used to define a character by code point, as +described above. +. +. +.SS "Absolute and relative backreferences" +.rs +.sp +The sequence \eg followed by a signed or unsigned number, optionally enclosed +in braces, is an absolute or relative backreference. A named backreference +can be coded as \eg{name}. Backreferences are discussed +.\" HTML +.\" +later, +.\" +following the discussion of +.\" HTML +.\" +parenthesized groups. +.\" +. +. +.SS "Absolute and relative subroutine calls" +.rs +.sp +For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for referencing a capture group as a subroutine. Details are discussed +.\" HTML +.\" +later. +.\" +Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP +synonymous. The former is a backreference; the latter is a +.\" HTML +.\" +subroutine +.\" +call. +. +. +.\" HTML +.SS "Generic character types" +.rs +.sp +Another use of backslash is for specifying generic character types: +.sp + \ed any decimal digit + \eD any character that is not a decimal digit + \eh any horizontal white space character + \eH any character that is not a horizontal white space character + \eN any character that is not a newline + \es any white space character + \eS any character that is not a white space character + \ev any vertical white space character + \eV any character that is not a vertical white space character + \ew any "word" character + \eW any "non-word" character +.sp +The \eN escape sequence has the same meaning as +.\" HTML +.\" +the "." metacharacter +.\" +when PCRE2_DOTALL is not set, but setting PCRE2_DOTALL does not change the +meaning of \eN. Note that when \eN is followed by an opening brace it has a +different meaning. See the section entitled +.\" HTML +.\" +"Non-printing characters" +.\" +above for details. Perl also uses \eN{name} to specify characters by Unicode +name; PCRE2 does not support this. +.P +Each pair of lower and upper case escape sequences partitions the complete set +of characters into two disjoint sets. Any given character matches one, and only +one, of each pair. The sequences can appear both inside and outside character +classes. They each match one character of the appropriate type. If the current +matching point is at the end of the subject string, all of them fail, because +there is no character to match. +.P +The default \es characters are HT (9), LF (10), VT (11), FF (12), CR (13), and +space (32), which are defined as white space in the "C" locale. This list may +vary if locale-specific matching is taking place. For example, in some locales +the "non-breaking space" character (\exA0) is recognized as white space, and in +others the VT character is not. +.P +A "word" character is an underscore or any character that is a letter or digit. +By default, the definition of letters and digits is controlled by PCRE2's +low-valued character tables, and may vary if locale-specific matching is taking +place (see +.\" HTML +.\" +"Locale support" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +page). For example, in a French locale such as "fr_FR" in Unix-like systems, +or "french" in Windows, some character codes greater than 127 are used for +accented letters, and these are then matched by \ew. The use of locales with +Unicode is discouraged. +.P +By default, characters whose code points are greater than 127 never match \ed, +\es, or \ew, and always match \eD, \eS, and \eW, although this may be different +for characters in the range 128-255 when locale-specific matching is happening. +These escape sequences retain their original meanings from before Unicode +support was available, mainly for efficiency reasons. If the PCRE2_UCP option +is set, the behaviour is changed so that Unicode properties are used to +determine character types, as follows: +.sp + \ed any character that matches \ep{Nd} (decimal digit) + \es any character that matches \ep{Z} or \eh or \ev + \ew any character that matches \ep{L}, \ep{N}, \ep{Mn}, or \ep{Pc} +.sp +The addition of \ep{Mn} (non-spacing mark) and the replacement of an explicit +test for underscore with a test for \ep{Pc} (connector punctuation) happened in +PCRE2 release 10.43. This brings PCRE2 into line with Perl. +.P +The upper case escapes match the inverse sets of characters. Note that \ed +matches only decimal digits, whereas \ew matches any Unicode digit, as well as +other character categories. Note also that PCRE2_UCP affects \eb, and +\eB because they are defined in terms of \ew and \eW. Matching these sequences +is noticeably slower when PCRE2_UCP is set. +.P +The effect of PCRE2_UCP on any one of these escape sequences can be negated by +the options PCRE2_EXTRA_ASCII_BSD, PCRE2_EXTRA_ASCII_BSS, and +PCRE2_EXTRA_ASCII_BSW, respectively. These options can be set and reset within +a pattern by means of an internal option setting +.\" HTML +.\" +(see below). +.\" +.P +The sequences \eh, \eH, \ev, and \eV, in contrast to the other sequences, which +match only ASCII characters by default, always match a specific list of code +points, whether or not PCRE2_UCP is set. The horizontal space characters are: +.sp + U+0009 Horizontal tab (HT) + U+0020 Space + U+00A0 Non-break space + U+1680 Ogham space mark + U+180E Mongolian vowel separator + U+2000 En quad + U+2001 Em quad + U+2002 En space + U+2003 Em space + U+2004 Three-per-em space + U+2005 Four-per-em space + U+2006 Six-per-em space + U+2007 Figure space + U+2008 Punctuation space + U+2009 Thin space + U+200A Hair space + U+202F Narrow no-break space + U+205F Medium mathematical space + U+3000 Ideographic space +.sp +The vertical space characters are: +.sp + U+000A Linefeed (LF) + U+000B Vertical tab (VT) + U+000C Form feed (FF) + U+000D Carriage return (CR) + U+0085 Next line (NEL) + U+2028 Line separator + U+2029 Paragraph separator +.sp +In 8-bit, non-UTF-8 mode, only the characters with code points less than 256 +are relevant. +. +. +.\" HTML +.SS "Newline sequences" +.rs +.sp +Outside a character class, by default, the escape sequence \eR matches any +Unicode newline sequence. In 8-bit non-UTF-8 mode \eR is equivalent to the +following: +.sp + (?>\er\en|\en|\ex0b|\ef|\er|\ex85) +.sp +This is an example of an "atomic group", details of which are given +.\" HTML +.\" +below. +.\" +This particular group matches either the two-character sequence CR followed by +LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, +U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next +line, U+0085). Because this is an atomic group, the two-character sequence is +treated as a single unit that cannot be split. +.P +In other modes, two additional characters whose code points are greater than 255 +are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029). +Unicode support is not needed for these characters to be recognized. +.P +It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the +complete set of Unicode line endings) by setting the option PCRE2_BSR_ANYCRLF +at compile time. (BSR is an abbreviation for "backslash R".) This can be made +the default when PCRE2 is built; if this is the case, the other behaviour can +be requested via the PCRE2_BSR_UNICODE option. It is also possible to specify +these settings by starting a pattern string with one of the following +sequences: +.sp + (*BSR_ANYCRLF) CR, LF, or CRLF only + (*BSR_UNICODE) any Unicode newline sequence +.sp +These override the default and the options given to the compiling function. +Note that these special settings, which are not Perl-compatible, are recognized +only at the very start of a pattern, and that they must be in upper case. If +more than one of them is present, the last one is used. They can be combined +with a change of newline convention; for example, a pattern can start with: +.sp + (*ANY)(*BSR_ANYCRLF) +.sp +They can also be combined with the (*UTF) or (*UCP) special sequences. Inside a +character class, \eR is treated as an unrecognized escape sequence, and causes +an error. +. +. +.\" HTML +.SS Unicode character properties +.rs +.sp +When PCRE2 is built with Unicode support (the default), three additional escape +sequences that match characters with specific properties are available. They +can be used in any mode, though in 8-bit and 16-bit non-UTF modes these +sequences are of course limited to testing characters whose code points are +less than U+0100 or U+10000, respectively. In 32-bit non-UTF mode, code points +greater than 0x10ffff (the Unicode limit) may be encountered. These are all +treated as being in the Unknown script and with an unassigned type. +.P +Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \ed and \ew do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +.P +The extra escape sequences that provide property support are: +.sp + \ep{\fIxx\fP} a character with the \fIxx\fP property + \eP{\fIxx\fP} a character without the \fIxx\fP property + \eX a Unicode extended grapheme cluster +.sp +For compatibility with Perl, negation can be specified by including a +circumflex between the opening brace and the property. For example, \ep{^Lu} is +the same as \eP{Lu}. +.P +In accordance with Unicode's "loose matching" rules, ASCII white space +characters, hyphens, and underscores are ignored in the properties represented +by \fIxx\fP above. As well as the space character, ASCII white space can be +tab, linefeed, vertical tab, formfeed, or carriage return. +.P +Some properties are specified as a name only; others as a name and a value, +separated by a colon or an equals sign. The names and values consist of ASCII +letters and digits (with one Perl-specific exception, see below). They are not +case sensitive. Note, however, that the escapes themselves, \ep and \eP, +\fIare\fP case sensitive. There are abbreviations for many names. The following +examples are all equivalent: +.sp + \ep{bidiclass=al} + \ep{BC=al} + \ep{ Bidi_Class : AL } + \ep{ Bi-di class = Al } + \eP{ ^ Bi-di class = Al } +.sp +There is support for Unicode script names, Unicode general category properties, +"Any", which matches any character (including newline), Bidi_Class, a number of +binary (yes/no) properties, and some special PCRE2 properties (described +.\" HTML +.\" +below). +.\" +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \eP{Any} does not match any characters, so always causes a +match failure. +. +. +. +.SS "Script properties for \ep and \eP" +.rs +.sp +There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas +\ep{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized and, as for all property specifications, an +equals sign is an alternative to the colon. If a script name is given without a +property type, for example, \ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl +changed to this interpretation at release 5.26 and PCRE2 changed at release +10.40. +.P +Unassigned characters (and in non-UTF 32-bit mode, characters with code points +greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not +part of an identified script are lumped together as "Common". The current list +of recognized script names and their 4-character abbreviations can be obtained +by running this command: +.sp + pcre2test -LS +.sp +. +. +. +.SS "The general category property for \ep and \eP" +.rs +.sp +Each character has exactly one Unicode general category property, specified by +a two-letter abbreviation. If only one letter is specified with \ep or \eP, it +includes all the general category properties that start with that letter. In +this case, in the absence of negation, the curly brackets in the escape +sequence are optional; these two examples have the same effect: +.sp + \ep{L} + \epL +.sp +The following general category property codes are supported: +.sp + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate +.sp + L Letter + Lc Cased letter + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter +.sp + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark +.sp + N Number + Nd Decimal number + Nl Letter number + No Other number +.sp + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation +.sp + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol +.sp + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator +.sp +Perl originally used the name L& for the Lc property. This is still supported +by Perl, but discouraged. PCRE2 also still supports it. This property matches +any character that has the Lu, Ll, or Lt property, in other words, any letter +that is not classified as a modifier or "other". From release 10.45 of PCRE2 +the properties Lu, Ll, and Lt are all treated as Lc when case-independent +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. The +other properties are not affected by caseless matching. +.P +The Cs (Surrogate) property applies only to characters whose code points are in +the range U+D800 to U+DFFF. These characters are no different to any other +character when PCRE2 is not in UTF mode (using the 16-bit or 32-bit library). +However, they are not valid in Unicode strings and so cannot be tested by PCRE2 +in UTF mode, unless UTF validity checking has been turned off (see the +discussion of PCRE2_NO_UTF_CHECK in the +.\" HREF +\fBpcre2api\fP +.\" +page). +.P +The long synonyms for property names that Perl supports (such as \ep{Letter}) +are not supported by PCRE2, nor is it permitted to prefix any of these +properties with "Is". +.P +No character that is in the Unicode table has the Cn (unassigned) property. +Instead, this property is assumed for any code point that is not in the +Unicode table. +. +. +.SS "Binary (yes/no) properties for \ep and \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +.sp +. +. +.SS "The Bidi_Class property for \ep and \eP" +.rs +.sp + \ep{Bidi_Class:} matches a character with the given class + \ep{BC:} matches a character with the given class +.sp +The recognized classes are: +.sp + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS white space +.sp +As in all property specifications, an equals sign may be used instead of a +colon and the class names are case-insensitive. Only the short names listed +above are recognized; PCRE2 does not at present support any long alternatives. +. +. +.SS Extended grapheme clusters +.rs +.sp +The \eX escape matches any number of Unicode characters that form an "extended +grapheme cluster", and treats the sequence as an atomic group +.\" HTML +.\" +(see below). +.\" +Unicode supports various kinds of composite character by giving each character +a grapheme breaking property, and having rules that use these properties to +define the boundaries of extended grapheme clusters. The rules are defined in +Unicode Standard Annex 29, "Unicode Text Segmentation". Unicode 11.0.0 +abandoned the use of some previous properties that had been used for emojis. +Instead it introduced various emoji-specific properties. PCRE2 uses only the +Extended Pictographic property. +.P +\eX always matches at least one character. Then it decides whether to add +additional characters according to the following rules for ending a cluster: +.P +1. End at the end of the subject string. +.P +2. Do not end between CR and LF; otherwise end after any control character. +.P +3. Do not break Hangul (a Korean script) syllable sequences. Hangul characters +are of five types: L, V, T, LV, and LVT. An L character may be followed by an +L, V, LV, or LVT character; an LV or V character may be followed by a V or T +character; an LVT or T character may be followed only by a T character. +.P +4. Do not end before extending characters or spacing marks or the zero-width +joiner (ZWJ) character. Characters with the "mark" property always have the +"extend" grapheme breaking property. +.P +5. Do not end after prepend characters. +.P +6. Do not end within emoji modifier sequences or emoji ZWJ (zero-width +joiner) sequences. An emoji ZWJ sequence consists of a character with the +Extended_Pictographic property, optionally followed by one or more characters +with the Extend property, followed by the ZWJ character, followed by another +Extended_Pictographic character. +.P +7. Do not break within emoji flag sequences. That is, do not break between +regional indicator (RI) characters if there are an odd number of RI characters +before the break point. +.P +8. Otherwise, end the cluster. +. +. +.\" HTML +.SS PCRE2's additional properties +.rs +.sp +As well as the standard Unicode properties described above, PCRE2 supports four +more that make it possible to convert traditional escape sequences such as \ew +and \es to use Unicode properties. PCRE2 uses these non-standard, non-Perl +properties internally when PCRE2_UCP is set. However, they may also be used +explicitly. These properties are: +.sp + Xan Any alphanumeric character + Xps Any POSIX space character + Xsp Any Perl space character + Xwd Any Perl "word" character +.sp +Xan matches characters that have either the L (letter) or the N (number) +property. Xps matches the characters tab, linefeed, vertical tab, form feed, or +carriage return, and any other character that has the Z (separator) property +(this includes the space character). Xsp is the same as Xps; in PCRE1 it used +to exclude vertical tab, for Perl compatibility, but Perl changed. Xwd matches +the same characters as Xan, plus those that match Mn (non-spacing mark) or Pc +(connector punctuation, which includes underscore). +.P +There is another non-standard property, Xuc, which matches any character that +can be represented by a Universal Character Name in C++ and other programming +languages. These are the characters $, @, ` (grave accent), and all characters +with Unicode code points greater than or equal to U+00A0, except for the +surrogates U+D800 to U+DFFF. Note that most base (ASCII) characters are +excluded. (Universal Character Names are of the form \euHHHH or \eUHHHHHHHH +where H is a hexadecimal digit. Note that the Xuc property does not match these +sequences but the characters that they represent.) +. +. +.\" HTML +.SS "Resetting the match start" +.rs +.sp +In normal use, the escape sequence \eK causes any previously matched characters +not to be included in the final matched sequence that is returned. For example, +the pattern: +.sp + foo\eKbar +.sp +matches "foobar", but reports that it has matched "bar". \eK does not interact +with anchoring in any way. The pattern: +.sp + ^foo\eKbar +.sp +matches only when the subject begins with "foobar" (in single line mode), +though it again reports the matched string as "bar". This feature is similar to +a lookbehind assertion +.\" HTML +.\" +(described below), +.\" +but the part of the pattern that precedes \eK is not constrained to match a +limited number of characters, as is required for a lookbehind assertion. The +use of \eK does not interfere with the setting of +.\" HTML +.\" +captured substrings. +.\" +For example, when the pattern +.sp + (foo)\eKbar +.sp +matches "foobar", the first substring is still set to "foo". +.P +From version 5.32.0 Perl forbids the use of \eK in lookaround assertions. From +release 10.38 PCRE2 also forbids this by default. However, the +PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK option can be used when calling +\fBpcre2_compile()\fP to re-enable the previous behaviour. When this option is +set, \eK is acted upon when it occurs inside positive assertions, but is +ignored in negative assertions. Note that when a pattern such as (?=ab\eK) +matches, the reported start of the match can be greater than the end of the +match. Using \eK in a lookbehind assertion at the start of a pattern can also +lead to odd effects. For example, consider this pattern: +.sp + (?<=\eKfoo)bar +.sp +If the subject is "foobar", a call to \fBpcre2_match()\fP with a starting +offset of 3 succeeds and reports the matching string as "foobar", that is, the +start of the reported match is earlier than where the match started. +. +. +.\" HTML +.SS "Simple assertions" +.rs +.sp +The final use of backslash is for certain simple assertions. An assertion +specifies a condition that has to be met at a particular point in a match, +without consuming any characters from the subject string. The use of +groups for more complicated assertions is described +.\" HTML +.\" +below. +.\" +The backslashed assertions are: +.sp + \eb matches at a word boundary + \eB matches when not at a word boundary + \eA matches at the start of the subject + \eZ matches at the end of the subject + also matches before a newline at the end of the subject + \ez matches only at the end of the subject + \eG matches at the first matching position in the subject +.sp +Inside a character class, \eb has a different meaning; it matches the backspace +character. If any other of these assertions appears in a character class, an +"invalid escape sequence" error is generated. +.P +A word boundary is a position in the subject string where the current character +and the previous character do not both match \ew or \eW (i.e. one matches +\ew and the other matches \eW), or the start or end of the string if the +first or last character matches \ew, respectively. When PCRE2 is built with +Unicode support, the meanings of \ew and \eW can be changed by setting the +PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2 +nor Perl has a separate "start of word" or "end of word" metasequence. However, +whatever follows \eb normally determines which it is. For example, the fragment +\eba matches "a" at the start of a word. +.P +The \eA, \eZ, and \ez assertions differ from the traditional circumflex and +dollar (described in the next section) in that they only ever match at the very +start and end of the subject string, whatever options are set. Thus, they are +independent of multiline mode. These three assertions are not affected by the +PCRE2_NOTBOL or PCRE2_NOTEOL options, which affect only the behaviour of the +circumflex and dollar metacharacters. However, if the \fIstartoffset\fP +argument of \fBpcre2_match()\fP is non-zero, indicating that matching is to +start at a point other than the beginning of the subject, \eA can never match. +The difference between \eZ and \ez is that \eZ matches before a newline at the +end of the string as well as at the very end, whereas \ez matches only at the +end. +.P +The \eG assertion is true only when the current matching position is at the +start point of the matching process, as specified by the \fIstartoffset\fP +argument of \fBpcre2_match()\fP. It differs from \eA when the value of +\fIstartoffset\fP is non-zero. By calling \fBpcre2_match()\fP multiple times +with appropriate arguments, you can mimic Perl's /g option, and it is in this +kind of implementation where \eG can be useful. +.P +Note, however, that PCRE2's implementation of \eG, being true at the starting +character of the matching process, is subtly different from Perl's, which +defines it as true at the end of the previous match. In Perl, these can be +different when the previously matched string was empty. Because PCRE2 does just +one match at a time, it cannot reproduce this behaviour. +.P +If all the alternatives of a pattern begin with \eG, the expression is anchored +to the starting match position, and the "anchored" flag is set in the compiled +regular expression. +. +. +.SH "CIRCUMFLEX AND DOLLAR" +.rs +.sp +The circumflex and dollar metacharacters are zero-width assertions. That is, +they test for a particular condition being true without consuming any +characters from the subject string. These two metacharacters are concerned with +matching the starts and ends of lines. If the newline convention is set so that +only the two-character sequence CRLF is recognized as a newline, isolated CR +and LF characters are treated as ordinary data characters, and are not +recognized as newlines. +.P +Outside a character class, in the default matching mode, the circumflex +character is an assertion that is true only if the current matching point is at +the start of the subject string. If the \fIstartoffset\fP argument of +\fBpcre2_match()\fP is non-zero, or if PCRE2_NOTBOL is set, circumflex can +never match if the PCRE2_MULTILINE option is unset. Inside a character class, +circumflex has an entirely different meaning +.\" HTML +.\" +(see below). +.\" +.P +Circumflex need not be the first character of the pattern if a number of +alternatives are involved, but it should be the first thing in each alternative +in which it appears if the pattern is ever to match that branch. If all +possible alternatives start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is said to be an +"anchored" pattern. (There are also other constructs that can cause a pattern +to be anchored.) +.P +The dollar character is an assertion that is true only if the current matching +point is at the end of the subject string, or immediately before a newline at +the end of the string (by default), unless PCRE2_NOTEOL is set. Note, however, +that it does not actually match the newline. Dollar need not be the last +character of the pattern if a number of alternatives are involved, but it +should be the last item in any branch in which it appears. Dollar has no +special meaning in a character class. +.P +The meaning of dollar can be changed so that it matches only at the very end of +the string, by setting the PCRE2_DOLLAR_ENDONLY option at compile time. This +does not affect the \eZ assertion. +.P +The meanings of the circumflex and dollar metacharacters are changed if the +PCRE2_MULTILINE option is set. When this is the case, a dollar character +matches before any newlines in the string, as well as at the very end, and a +circumflex matches immediately after internal newlines as well as at the start +of the subject string. It does not match after a newline that ends the string, +for compatibility with Perl. However, this can be changed by setting the +PCRE2_ALT_CIRCUMFLEX option. +.P +For example, the pattern /^abc$/ matches the subject string "def\enabc" (where +\en represents a newline) in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode because all branches start with +^ are not anchored in multiline mode, and a match for circumflex is possible +when the \fIstartoffset\fP argument of \fBpcre2_match()\fP is non-zero. The +PCRE2_DOLLAR_ENDONLY option is ignored if PCRE2_MULTILINE is set. +.P +When the newline convention (see +.\" HTML +.\" +"Newline conventions" +.\" +below) recognizes the two-character sequence CRLF as a newline, this is +preferred, even if the single characters CR and LF are also recognized as +newlines. For example, if the newline convention is "any", a multiline mode +circumflex matches before "xyz" in the string "abc\er\enxyz" rather than after +CR, even though CR on its own is a valid newline. (It also matches at the very +start of the string, of course.) +.P +Note that the sequences \eA, \eZ, and \ez can be used to match the start and +end of the subject in both modes, and if all branches of a pattern start with +\eA it is always anchored, whether or not PCRE2_MULTILINE is set. +. +. +.\" HTML +.SH "FULL STOP (PERIOD, DOT) AND \eN" +.rs +.sp +Outside a character class, a dot in the pattern matches any one character in +the subject string except (by default) a character that signifies the end of a +line. One or more characters may be specified as line terminators (see +.\" HTML +.\" +"Newline conventions" +.\" +above). +.P +Dot never matches a single line-ending character. When the two-character +sequence CRLF is the only line ending, dot does not match CR if it is +immediately followed by LF, but otherwise it matches all characters (including +isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurrences +of CR of LF match dot. When all Unicode line endings are being recognized, dot +does not match CR or LF or any of the other line ending characters. +.P +The behaviour of dot with regard to newlines can be changed. If the +PCRE2_DOTALL option is set, a dot matches any one character, without exception. +If the two-character sequence CRLF is present in the subject string, it takes +two dots to match it. +.P +The handling of dot is entirely independent of the handling of circumflex and +dollar, the only relationship being that they both involve newlines. Dot has no +special meaning in a character class. +.P +The escape sequence \eN when not followed by an opening brace behaves like a +dot, except that it is not affected by the PCRE2_DOTALL option. In other words, +it matches any character except one that signifies the end of a line. +.P +When \eN is followed by an opening brace it has a different meaning. See the +section entitled +.\" HTML +.\" +"Non-printing characters" +.\" +above for details. Perl also uses \eN{name} to specify characters by Unicode +name; PCRE2 does not support this. +. +. +.SH "MATCHING A SINGLE CODE UNIT" +.rs +.sp +Outside a character class, the escape sequence \eC matches any one code unit, +whether or not a UTF mode is set. In the 8-bit library, one code unit is one +byte; in the 16-bit library it is a 16-bit unit; in the 32-bit library it is a +32-bit unit. Unlike a dot, \eC always matches line-ending characters. The +feature is provided in Perl in order to match individual bytes in UTF-8 mode, +but it is unclear how it can usefully be used. +.P +Because \eC breaks up characters into individual code units, matching one unit +with \eC in UTF-8 or UTF-16 mode means that the rest of the string may start +with a malformed UTF character. This has undefined results, because PCRE2 +assumes that it is matching character by character in a valid UTF string (by +default it checks the subject string's validity at the start of processing +unless the PCRE2_NO_UTF_CHECK or PCRE2_MATCH_INVALID_UTF option is used). +.P +An application can lock out the use of \eC by setting the +PCRE2_NEVER_BACKSLASH_C option when compiling a pattern. It is also possible to +build PCRE2 with the use of \eC permanently disabled. +.P +PCRE2 does not allow \eC to appear in lookbehind assertions +.\" HTML +.\" +(described below) +.\" +in UTF-8 or UTF-16 modes, because this would make it impossible to calculate +the length of the lookbehind. Neither the alternative matching function +\fBpcre2_dfa_match()\fP nor the JIT optimizer support \eC in these UTF modes. +The former gives a match-time error; the latter fails to optimize and so the +match is always run using the interpreter. +.P +In the 32-bit library, however, \eC is always supported (when not explicitly +locked out) because it always matches a single code unit, whether or not UTF-32 +is specified. +.P +In general, the \eC escape sequence is best avoided. However, one way of using +it that avoids the problem of malformed UTF-8 or UTF-16 characters is to use a +lookahead to check the length of the next character, as in this pattern, which +could be used with a UTF-8 string (ignore white space and line breaks): +.sp + (?| (?=[\ex00-\ex7f])(\eC) | + (?=[\ex80-\ex{7ff}])(\eC)(\eC) | + (?=[\ex{800}-\ex{ffff}])(\eC)(\eC)(\eC) | + (?=[\ex{10000}-\ex{1fffff}])(\eC)(\eC)(\eC)(\eC)) +.sp +In this example, a group that starts with (?| resets the capturing parentheses +numbers in each alternative (see +.\" HTML +.\" +"Duplicate Group Numbers" +.\" +below). The assertions at the start of each branch check the next UTF-8 +character for values whose encoding uses 1, 2, 3, or 4 bytes, respectively. The +character's individual bytes are then captured by the appropriate number of +\eC groups. +. +. +.\" HTML +.SH "SQUARE BRACKETS AND CHARACTER CLASSES" +.rs +.sp +An opening square bracket introduces a character class, terminated by a closing +square bracket. A closing square bracket on its own is not special by default. +If a closing square bracket is required as a member of the class, it should be +the first data character in the class (after an initial circumflex, if present) +or escaped with a backslash. This means that, by default, an empty class cannot +be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing +square bracket at the start does end the (empty) class. +.P +A character class matches a single character in the subject. A matched +character must be in the set of characters defined by the class, unless the +first character in the class definition is a circumflex, in which case the +subject character must not be in the set defined by the class. If a circumflex +is actually required as a member of the class, ensure it is not the first +character, or escape it with a backslash. +.P +For example, the character class [aeiou] matches any lower case English vowel, +whereas [^aeiou] matches all other characters. Note that a circumflex is just a +convenient notation for specifying the characters that are in the class by +enumerating those that are not. A class that starts with a circumflex is not an +assertion; it still consumes a character from the subject string, and therefore +it fails to match if the current pointer is at the end of the string. +.P +Characters in a class may be specified by their code points using \eo, \ex, or +\eN{U+hh..} in the usual way. When caseless matching is set, any letters in a +class represent both their upper case and lower case versions, so for example, +a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not +match "A", whereas a caseful version would. Note that there are two ASCII +characters, K and S, that, in addition to their lower case ASCII equivalents, +are case-equivalent with Unicode U+212A (Kelvin sign) and U+017F (long S) +respectively when either PCRE2_UTF or PCRE2_UCP is set. If you do not want +these ASCII/non-ASCII case equivalences, you can suppress them by setting +PCRE2_EXTRA_CASELESS_RESTRICT, either as an option in a compile context, or by +including (*CASELESS_RESTRICT) or (?r) within a pattern. +.P +Characters that might indicate line breaks are never treated in any special way +when matching character classes, whatever line-ending sequence is in use, and +whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A +class such as [^a] always matches one of these characters. +.P +The generic character type escape sequences \ed, \eD, \eh, \eH, \ep, \eP, \es, +\eS, \ev, \eV, \ew, and \eW may appear in a character class, and add the +characters that they match to the class. For example, [\edABCDEF] matches any +hexadecimal digit. In UTF modes, the PCRE2_UCP option affects the meanings of +\ed, \es, \ew and their upper case partners, just as it does when they appear +outside a character class, as described in the section entitled +.\" HTML +.\" +"Generic character types" +.\" +above. The escape sequence \eb has a different meaning inside a character +class; it matches the backspace character. The sequences \eB, \eR, and \eX are +not special inside a character class. Like any other unrecognized escape +sequences, they cause an error. The same is true for \eN when not followed by +an opening brace. +.P +The minus (hyphen) character can be used to specify a range of characters in a +character class. For example, [d-m] matches any letter between d and m, +inclusive. If a minus character is required in a class, it must be escaped with +a backslash or appear in a position where it cannot be interpreted as +indicating a range, typically as the first or last character in the class, +or immediately after a range. For example, [b-d-z] matches letters in the range +b to d, a hyphen character, or z. +.P +There is some special treatment for alphabetic ranges in EBCDIC environments; +see the section +.\" HTML +.\" +"EBCDIC environments" +.\" +below. +.P +Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or before or after a character type escape such as \ed or \eH. +However, unless the hyphen is the last character in the class, Perl outputs a +warning in its warning mode, as this is most likely a user error. As PCRE2 has +no facility for warning, an error is given in these cases. +.P +It is not possible to have the literal character "]" as the end character of a +range. A pattern such as [W-]46] is interpreted as a class of two characters +("W" and "-") followed by a literal string "46]", so it would match "W46]" or +"-46]". However, if the "]" is escaped with a backslash it is interpreted as +the end of a range, so [W-\e]46] is interpreted as a class containing a range +and two other characters. The octal or hexadecimal representation of "]" can +also be used to end a range. +.P +Ranges normally include all code points between the start and end characters, +inclusive. They can also be used for code points specified numerically, for +example [\e000-\e037]. Ranges can include any characters that are valid for the +current mode. In any UTF mode, the so-called "surrogate" characters (those +whose code points lie between 0xd800 and 0xdfff inclusive) may not be specified +explicitly by default (the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables +this check). However, ranges such as [\ex{d7ff}-\ex{e000}], which include the +surrogates, are always permitted. +.P +If a range that includes letters is used when caseless matching is set, it +matches the letters in either case. For example, [W-c] is equivalent to +[][\e\e^_`wxyzabc], matched caselessly, and in a non-UTF mode, if character +tables for a French locale are in use, [\exc8-\excb] matches accented E +characters in both cases. +.P +A circumflex can conveniently be used with the upper case character types to +specify a more restricted set of characters than the matching lower case type. +For example, the class [^\eW_] matches any letter or digit, but not underscore, +whereas [\ew] includes underscore. A positive character class should be read as +"something OR something OR ..." and a negative class as "NOT something AND NOT +something AND NOT ...". +.P +The metacharacters that are recognized in character classes are backslash, +hyphen (when it can be interpreted as specifying a range), circumflex +(only at the start), and the terminating closing square bracket. An opening +square bracket is also special when it can be interpreted as introducing a +POSIX class (see +.\" HTML +.\" +"Posix character classes" +.\" +below), or a special compatibility feature (see +.\" HTML +.\" +"Compatibility feature for word boundaries" +.\" +below. Escaping any non-alphanumeric character in a class turns it into a +literal, whether or not it would otherwise be a metacharacter. +. +. +.SH "PERL EXTENDED CHARACTER CLASSES" +.rs +.sp +From release 10.45 PCRE2 supports Perl's (?[...]) extended character class +syntax. This can be used to perform set operations such as intersection on +character classes. +.P +The syntax permitted within (?[...]) is quite different to ordinary character +classes. Inside the extended class, there is an expression syntax consisting of +"atoms", operators, and ordinary parentheses "()" used for grouping. Such +classes always have the Perl /xx modifier (PCRE2 option PCRE2_EXTENDED_MORE) +turned on within them. This means that literal space and tab characters are +ignored everywhere in the class. +.P +The allowed atoms are individual characters specified by escape sequences such +as \en or \ex{123}, character types such as \ed, POSIX classes such as +[:alpha:], and nested ordinary (non-extended) character classes. For example, +in (?[\ed & [...]]) the nested class [...] follows the usual rules for ordinary +character classes, in which parentheses are not metacharacters, and character +literals and ranges are permitted. +.P +Character literals and ranges may not appear outside a nested ordinary +character class because they are not atoms in the extended syntax. The extended +syntax does not introduce any additional escape sequences, so (?[\ey]) is an +unknown escape, as it would be in [\ey]. +.P +In the extended syntax, ^ does not negate a class (except within an +ordinary class nested inside an extended class); it is instead a binary +operator. +.P +The binary operators are "&" (intersection), "|" or "+" (union), "-" +(subtraction) and "^" (symmetric difference). These are left-associative and +"&" has higher (tighter) precedence, while the others have equal lower +precedence. The one prefix unary operator is "!" (complement), with highest +precedence. +. +. +.SH "UTS#18 EXTENDED CHARACTER CLASSES" +.rs +.sp +The PCRE2_ALT_EXTENDED_CLASS option enables an alternative to Perl's (?[...]) +syntax, allowing instead extended class behaviour inside ordinary [...] +character classes. This altered syntax for [...] classes is loosely described +by the Unicode standard UTS#18. The PCRE2_ALT_EXTENDED_CLASS option does not +prevent use of (?[...]) classes; it just changes the meaning of all +[...] classes that are not nested inside a Perl (?[...]) class. +.P +Firstly, in ordinary Perl [...] syntax, an expression such as "[a[]" is a +character class with two literal characters "a" and "[", but in UTS#18 extended +classes the "[" character becomes an additional metacharacter within classes, +denoting the start of a nested class, so a literal "[" must be escaped as "\e[". +.P +Secondly, within the UTS#18 extended syntax, there are operators "||", "&&", +"--" and "~~" which denote character class union, intersection, subtraction, +and symmetric difference respectively. In standard Perl syntax, these would +simply be needlessly-repeated literals (except for "--" which could be the +start or end of a range). In UTS#18 extended classes these operators can be used +in constructs such as [\ep{L}--[QW]] for "Unicode letters, other than Q and W". +A literal "-" at the start or end of a range must be escaped, so while "[--1]" +in Perl syntax is the range from hyphen to "1", it must be escaped as "[\e--1]" +in UTS#18 extended classes. +.P +Unlike Perl's (?[...]) extended classes, the PCRE2_EXTENDED_MORE option to +ignore space and tab characters is not automatically enabled for UTS#18 +extended classes, but it is honoured if set. +.P +Extended UTS#18 classes can be nested, and nested classes are themselves +extended classes (unlike Perl, where nested classes must be simple classes). +For example, [\ep{L}&&[\ep{Thai}||\ep{Greek}]] matches any letter that is in +the Thai or Greek scripts. Note that this means that no special grouping +characters (such as the parentheses used in Perl's (?[...]) class syntax) are +needed. +.P +Individual class items (literal characters, literal ranges, properties such as +\ed or \ep{...}, and nested classes) can be combined by juxtaposition or by an +operator. Juxtaposition is the implicit union operator, and binds more tightly +than any explicit operator. Thus a sequence of literals and/or ranges behaves +as if it is enclosed in square brackets. For example, [A-Z0-9&&[^E8]] is the +same as [[A-Z0-9]&&[^E8]], which matches any upper case alphanumeric character +except "E" or "8". +.P +Precedence between the explicit operators is not defined, so mixing operators +is a syntax error. For example, [A&&B--C] is an error, but [A&&[B--C]] is +valid. +.P +This is an emerging syntax which is being adopted gradually across the regex +ecosystem: for example JavaScript adopted the "/v" flag in ECMAScript 2024; +Python's "re" module reserves the syntax for future use with a FutureWarning +for unescaped use of "[" as a literal within character classes. Due to UTS#18 +providing insufficient guidance, engines interpret the syntax differently. +Rust's "regex" crate and Python's "regex" PyPi module both implement UTS#18 +extended classes, but with slight incompatibilities ([A||B&&C] is parsed as +[A||[B&&C]] in Python's "regex" but as [[A||B]&&C] in Rust's "regex"). +.P +PCRE2's syntax adds syntax restrictions similar to ECMASCript's /v flag, so +that all the UTS#18 extended classes accepted as valid by PCRE2 have the +property that they are interpreted either with the same behaviour, or as +invalid, by all other major engines. Please file an issue if you are aware of +cross-engine differences in behaviour between PCRE2 and another major engine. +. +. +.\" HTML +.SH "POSIX CHARACTER CLASSES" +.rs +.sp +Perl supports the POSIX notation for character classes. This uses names +enclosed by [: and :] within the enclosing square brackets. PCRE2 also supports +this notation, in both ordinary and extended classes. For example, +.sp + [01[:alpha:]%] +.sp +matches "0", "1", any alphabetic character, or "%". The supported class names +are: +.sp + alnum letters and digits + alpha letters + ascii character codes 0 - 127 + blank space or tab only + cntrl control characters + digit decimal digits (same as \ed) + graph printing characters, excluding space + lower lower case letters + print printing characters, including space + punct printing characters, excluding letters and digits and space + space white space (the same as \es from PCRE2 8.34) + upper upper case letters + word "word" characters (same as \ew) + xdigit hexadecimal digits +.sp +The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), +and space (32). If locale-specific matching is taking place, the list of space +characters may be different; there may be fewer or more of them. "Space" and +\es match the same set of characters, as do "word" and \ew. +.P +The name "word" is a Perl extension, and "blank" is a GNU extension from Perl +5.8. Another Perl extension is negation, which is indicated by a ^ character +after the colon. For example, +.sp + [12[:^digit:]] +.sp +matches "1", "2", or any non-digit. PCRE2 (and Perl) also recognize the POSIX +syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not +supported, and an error is given if they are encountered. +.P +By default, characters with values greater than 127 do not match any of the +POSIX character classes, although this may be different for characters in the +range 128-255 when locale-specific matching is happening. However, in UCP mode, +unless certain options are set (see below), some of the classes are changed so +that Unicode character properties are used. This is achieved by replacing +POSIX classes with other sequences, as follows: +.sp + [:alnum:] becomes \ep{Xan} + [:alpha:] becomes \ep{L} + [:blank:] becomes \eh + [:cntrl:] becomes \ep{Cc} + [:digit:] becomes \ep{Nd} + [:lower:] becomes \ep{Ll} + [:space:] becomes \ep{Xps} + [:upper:] becomes \ep{Lu} + [:word:] becomes \ep{Xwd} +.sp +Negated versions, such as [:^alpha:] use \eP instead of \ep. Four other POSIX +classes are handled specially in UCP mode: +.TP 10 +[:graph:] +This matches characters that have glyphs that mark the page when printed. In +Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf +properties, except for: +.sp + U+061C Arabic Letter Mark + U+180E Mongolian Vowel Separator + U+2066 - U+2069 Various "isolate"s +.sp +.TP 10 +[:print:] +This matches the same characters as [:graph:] plus space characters that are +not controls, that is, characters with the Zs property. +.TP 10 +[:punct:] +This matches all characters that have the Unicode P (punctuation) property, +plus those characters with code points less than 256 that have the S (Symbol) +property. +.TP 10 +[:xdigit:] +In addition to the ASCII hexadecimal digits, this also matches the "fullwidth" +versions of those characters, whose Unicode code points start at U+FF10. This +is a change that was made in PCRE2 release 10.43 for Perl compatibility. +.P +The other POSIX classes are unchanged by PCRE2_UCP, and match only characters +with code points less than 256. +.P +There are two options that can be used to restrict the POSIX classes to ASCII +characters when PCRE2_UCP is set. The option PCRE2_EXTRA_ASCII_DIGIT affects +just [:digit:] and [:xdigit:]. Within a pattern, this can be set and unset by +(?aT) and (?-aT). The PCRE2_EXTRA_ASCII_POSIX option disables UCP processing +for all POSIX classes, including [:digit:] and [:xdigit:]. Within a pattern, +(?aP) and (?-aP) set and unset both these options for consistency. +. +. +.\" HTML +.SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES" +.rs +.sp +In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly +syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of +word". PCRE2 treats these items as follows: +.sp + [[:<:]] is converted to \eb(?=\ew) + [[:>:]] is converted to \eb(?<=\ew) +.sp +Only these exact character sequences are recognized. A sequence such as +[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is +not compatible with Perl. It is provided to help migrations from other +environments, and is best not used in any new patterns. Note that \eb matches +at the start and the end of a word (see +.\" HTML +.\" +"Simple assertions" +.\" +above), and in a Perl-style pattern the preceding or following character +normally shows which is wanted, without the need for the assertions that are +used above in order to give exactly the POSIX behaviour. Note also that the +PCRE2_UCP option changes the meaning of \ew (and therefore \eb) by default, so +it also affects these POSIX sequences. +. +. +.SH "VERTICAL BAR" +.rs +.sp +Vertical bar characters are used to separate alternative patterns. For example, +the pattern +.sp + gilbert|sullivan +.sp +matches either "gilbert" or "sullivan". Any number of alternatives may appear, +and an empty alternative is permitted (matching the empty string). The matching +process tries each alternative in turn, from left to right, and the first one +that succeeds is used. If the alternatives are within a group +.\" HTML +.\" +(defined below), +.\" +"succeeds" means matching the rest of the main pattern as well as the +alternative in the group. +. +. +.\" HTML +.SH "INTERNAL OPTION SETTING" +.rs +.sp +The settings of several options can be changed within a pattern by a sequence +of letters enclosed between "(?" and ")". The following are Perl-compatible, +and are described in detail in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. The option letters are: +.sp + i for PCRE2_CASELESS + m for PCRE2_MULTILINE + n for PCRE2_NO_AUTO_CAPTURE + s for PCRE2_DOTALL + x for PCRE2_EXTENDED + xx for PCRE2_EXTENDED_MORE +.sp +For example, (?im) sets caseless, multiline matching. It is also possible to +unset these options by preceding the relevant letters with a hyphen, for +example (?-im). The two "extended" options are not independent; unsetting +either one cancels the effects of both of them. +.P +A combined setting and unsetting such as (?im-sx), which sets PCRE2_CASELESS +and PCRE2_MULTILINE while unsetting PCRE2_DOTALL and PCRE2_EXTENDED, is also +permitted. Only one hyphen may appear in the options string. If a letter +appears both before and after the hyphen, the option is unset. An empty options +setting "(?)" is allowed. Needless to say, it has no effect. +.P +If the first character following (? is a circumflex, it causes all of the above +options to be unset. Letters may follow the circumflex to cause some options to +be re-instated, but a hyphen may not appear. +.P +Some PCRE2-specific options can be changed by the same mechanism using these +pairs or individual letters: +.sp + aD for PCRE2_EXTRA_ASCII_BSD + aS for PCRE2_EXTRA_ASCII_BSS + aW for PCRE2_EXTRA_ASCII_BSW + aP for PCRE2_EXTRA_ASCII_POSIX and PCRE2_EXTRA_ASCII_DIGIT + aT for PCRE2_EXTRA_ASCII_DIGIT + r for PCRE2_EXTRA_CASELESS_RESTRICT + J for PCRE2_DUPNAMES + U for PCRE2_UNGREEDY +.sp +However, except for 'r', these are not unset by (?^), which is equivalent to +(?-imnrsx). If 'a' is not followed by any of the upper case letters shown +above, it sets (or unsets) all the ASCII options. +.P +PCRE2_EXTRA_ASCII_DIGIT has no additional effect when PCRE2_EXTRA_ASCII_POSIX +is set, but including it in (?aP) means that (?-aP) suppresses all ASCII +restrictions for POSIX classes. +.P +When one of these option changes occurs at top level (that is, not inside group +parentheses), the change applies until a subsequent change, or the end of the +pattern. An option change within a group (see below for a description of +groups) affects only that part of the group that follows it. At the end of the +group these options are reset to the state they were before the group. For +example, +.sp + (a(?i)b)c +.sp +matches abc and aBc and no other strings (assuming PCRE2_CASELESS is not set +externally). Any changes made in one alternative do carry on into subsequent +branches within the same group. For example, +.sp + (a(?i)b|c) +.sp +matches "ab", "aB", "c", and "C", even though when matching "C" the first +branch is abandoned before the option setting. This is because the effects of +option settings happen at compile time. There would be some very weird +behaviour otherwise. +.P +As a convenient shorthand, if any option settings are required at the start of +a non-capturing group (see the next section), the option letters may +appear between the "?" and the ":". Thus the two patterns +.sp + (?i:saturday|sunday) + (?:(?i)saturday|sunday) +.sp +match exactly the same set of strings. +.P +\fBNote:\fP There are other PCRE2-specific options, applying to the whole +pattern, which can be set by the application when the compiling function is +called. In addition, the pattern can contain special leading sequences such as +(*CRLF) to override what the application has set or what has been defaulted. +Details are given in the section entitled +.\" HTML +.\" +"Newline sequences" +.\" +above. There are also the (*UTF) and (*UCP) leading sequences that can be used +to set UTF and Unicode property modes; they are equivalent to setting the +PCRE2_UTF and PCRE2_UCP options, respectively. However, the application can set +the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, which lock out the use of the +(*UTF) and (*UCP) sequences. +. +. +.\" HTML +.SH GROUPS +.rs +.sp +Groups are delimited by parentheses (round brackets), which can be nested. +Turning part of a pattern into a group does two things: +.sp +1. It localizes a set of alternatives. For example, the pattern +.sp + cat(aract|erpillar|) +.sp +matches "cataract", "caterpillar", or "cat". Without the parentheses, it would +match "cataract", "erpillar" or an empty string. +.sp +2. It creates a "capture group". This means that, when the whole pattern +matches, the portion of the subject string that matched the group is passed +back to the caller, separately from the portion that matched the whole pattern. +(This applies only to the traditional matching function; the DFA matching +function does not support capturing.) +.P +Opening parentheses are counted from left to right (starting from 1) to obtain +numbers for capture groups. For example, if the string "the red king" is +matched against the pattern +.sp + the ((red|white) (king|queen)) +.sp +the captured substrings are "red king", "red", and "king", and are numbered 1, +2, and 3, respectively. +.P +The fact that plain parentheses fulfil two functions is not always helpful. +There are often times when grouping is required without capturing. If an +opening parenthesis is followed by a question mark and a colon, the group +does not do any capturing, and is not counted when computing the number of any +subsequent capture groups. For example, if the string "the white queen" +is matched against the pattern +.sp + the ((?:red|white) (king|queen)) +.sp +the captured substrings are "white queen" and "queen", and are numbered 1 and +2. The maximum number of capture groups is 65535. +.P +As a convenient shorthand, if any option settings are required at the start of +a non-capturing group, the option letters may appear between the "?" and the +":". Thus the two patterns +.sp + (?i:saturday|sunday) + (?:(?i)saturday|sunday) +.sp +match exactly the same set of strings. Because alternative branches are tried +from left to right, and options are not reset until the end of the group is +reached, an option setting in one branch does affect subsequent branches, so +the above patterns match "SUNDAY" as well as "Saturday". +. +. +.\" HTML +.SH "DUPLICATE GROUP NUMBERS" +.rs +.sp +Perl 5.10 introduced a feature whereby each alternative in a group uses the +same numbers for its capturing parentheses. Such a group starts with (?| and is +itself a non-capturing group. For example, consider this pattern: +.sp + (?|(Sat)ur|(Sun))day +.sp +Because the two alternatives are inside a (?| group, both sets of capturing +parentheses are numbered one. Thus, when the pattern matches, you can look +at captured substring number one, whichever alternative matched. This construct +is useful when you want to capture part, but not all, of one of a number of +alternatives. Inside a (?| group, parentheses are numbered as usual, but the +number is reset at the start of each branch. The numbers of any capturing +parentheses that follow the whole group start after the highest number used in +any branch. The following example is taken from the Perl documentation. The +numbers underneath show in which buffer the captured content will be stored. +.sp + # before ---------------branch-reset----------- after + / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x + # 1 2 2 3 2 3 4 +.sp +A backreference to a capture group uses the most recent value that is set for +the group. The following pattern matches "abcabc" or "defdef": +.sp + /(?|(abc)|(def))\e1/ +.sp +In contrast, a subroutine call to a capture group always refers to the +first one in the pattern with the given number. The following pattern matches +"abcabc" or "defabc": +.sp + /(?|(abc)|(def))(?1)/ +.sp +A relative reference such as (?-1) is no different: it is just a convenient way +of computing an absolute group number. +.P +If a +.\" HTML +.\" +condition test +.\" +for a group's having matched refers to a non-unique number, the test is +true if any group with that number has matched. +.P +An alternative approach to using this "branch reset" feature is to use +duplicate named groups, as described in the next section. +. +. +.SH "NAMED CAPTURE GROUPS" +.rs +.sp +Identifying capture groups by number is simple, but it can be very hard to keep +track of the numbers in complicated patterns. Furthermore, if an expression is +modified, the numbers may change. To help with this difficulty, PCRE2 supports +the naming of capture groups. This feature was not added to Perl until release +5.10. Python had the feature earlier, and PCRE1 introduced it at release 4.0, +using the Python syntax. PCRE2 supports both the Perl and the Python syntax. +.P +In PCRE2, a capture group can be named in one of three ways: (?...) or +(?'name'...) as in Perl, or (?P...) as in Python. Names may be up to 128 +code units long. When PCRE2_UTF is not set, they may contain only ASCII +alphanumeric characters and underscores, but must start with a non-digit. When +PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode +letter or Unicode decimal digit. In other words, group names must match one of +these patterns: +.sp + ^[_A-Za-z][_A-Za-z0-9]*\ez when PCRE2_UTF is not set + ^[_\ep{L}][_\ep{L}\ep{Nd}]*\ez when PCRE2_UTF is set +.sp +References to capture groups from other parts of the pattern, such as +.\" HTML +.\" +backreferences, +.\" +.\" HTML +.\" +recursion, +.\" +and +.\" HTML +.\" +conditions, +.\" +can all be made by name as well as by number. +.P +Named capture groups are allocated numbers as well as names, exactly as +if the names were not present. In both PCRE2 and Perl, capture groups +are primarily identified by numbers; any names are just aliases for these +numbers. The PCRE2 API provides function calls for extracting the complete +name-to-number translation table from a compiled pattern, as well as +convenience functions for extracting captured substrings by name. +.P +\fBWarning:\fP When more than one capture group has the same number, as +described in the previous section, a name given to one of them applies to all +of them. Perl allows identically numbered groups to have different names. +Consider this pattern, where there are two capture groups, both numbered 1: +.sp + (?|(?aa)|(?bb)) +.sp +Perl allows this, with both names AA and BB as aliases of group 1. Thus, after +a successful match, both names yield the same value (either "aa" or "bb"). +.P +In an attempt to reduce confusion, PCRE2 does not allow the same group number +to be associated with more than one name. The example above provokes a +compile-time error. However, there is still scope for confusion. Consider this +pattern: +.sp + (?|(?aa)|(bb)) +.sp +Although the second group number 1 is not explicitly named, the name AA is +still an alias for any group 1. Whether the pattern matches "aa" or "bb", a +reference by name to group AA yields the matched string. +.P +By default, a name must be unique within a pattern, except that duplicate names +are permitted for groups with the same number, for example: +.sp + (?|(?aa)|(?bb)) +.sp +The duplicate name constraint can be disabled by setting the PCRE2_DUPNAMES +option at compile time, or by the use of (?J) within the pattern, as described +in the section entitled +.\" HTML +.\" +"Internal Option Setting" +.\" +above. +.P +Duplicate names can be useful for patterns where only one instance of the named +capture group can match. Suppose you want to match the name of a weekday, +either as a 3-letter abbreviation or as the full name, and in both cases you +want to extract the abbreviation. This pattern (ignoring the line breaks) does +the job: +.sp + (?J) + (?Mon|Fri|Sun)(?:day)?| + (?Tue)(?:sday)?| + (?Wed)(?:nesday)?| + (?Thu)(?:rsday)?| + (?Sat)(?:urday)? +.sp +There are five capture groups, but only one is ever set after a match. The +convenience functions for extracting the data by name returns the substring for +the first (and in this example, the only) group of that name that matched. This +saves searching to find which numbered group it was. (An alternative way of +solving this problem is to use a "branch reset" group, as described in the +previous section.) +.P +If you make a backreference to a non-unique named group from elsewhere in the +pattern, the groups to which the name refers are checked in the order in which +they appear in the overall pattern. The first one that is set is used for the +reference. For example, this pattern matches both "foofoo" and "barbar" but not +"foobar" or "barfoo": +.sp + (?J)(?:(?foo)|(?bar))\ek +.sp +.P +If you make a subroutine call to a non-unique named group, the one that +corresponds to the first occurrence of the name is used. In the absence of +duplicate numbers this is the one with the lowest number. +.P +If you use a named reference in a condition +test (see the +.\" +.\" HTML +.\" +section about conditions +.\" +below), either to check whether a capture group has matched, or to check for +recursion, all groups with the same name are tested. If the condition is true +for any one of them, the overall condition is true. This is the same behaviour +as testing by number. For further details of the interfaces for handling named +capture groups, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +. +.SH REPETITION +.rs +.sp +Repetition is specified by quantifiers, which may follow any one of these +items: +.sp + a literal data character + the dot metacharacter + the \eC escape sequence + the \eR escape sequence + the \eX escape sequence + any escape sequence that matches a single character + a character class + a backreference + a parenthesized group (including lookaround assertions) + a subroutine call (recursive or otherwise) +.sp +If a quantifier does not follow a repeatable item, an error occurs. The +general repetition quantifier specifies a minimum and maximum number of +permitted matches by giving two numbers in curly brackets (braces), separated +by a comma. The numbers must be less than 65536, and the first must be less +than or equal to the second. For example, +.sp + z{2,4} +.sp +matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special +character. If the second number is omitted, but the comma is present, there is +no upper limit; if the second number and the comma are both omitted, the +quantifier specifies an exact number of required matches. Thus +.sp + [aeiou]{3,} +.sp +matches at least 3 successive vowels, but may match many more, whereas +.sp + \ed{8} +.sp +matches exactly 8 digits. If the first number is omitted, the lower limit is +taken as zero; in this case the upper limit must be present. +.sp + X{,4} is interpreted as X{0,4} +.sp +This is a change in behaviour that happened in Perl 5.34.0 and PCRE2 10.43. In +earlier versions such a sequence was not interpreted as a quantifier. Other +regular expression engines may behave either way. +.P +If the characters that follow an opening brace do not match the syntax of a +quantifier, the brace is taken as a literal character. In particular, this +means that {,} is a literal string of three characters. +.P +Note that not every opening brace is potentially the start of a quantifier +because braces are used in other items such as \eN{U+345} or \ek{name}. +.P +In UTF modes, quantifiers apply to characters rather than to individual code +units. Thus, for example, \ex{100}{2} matches two characters, each of +which is represented by a two-byte sequence in a UTF-8 string. Similarly, +\eX{3} matches three Unicode extended grapheme clusters, each of which may be +several code units long (and they may be of different lengths). +.P +The quantifier {0} is permitted, causing the expression to behave as if the +previous item and the quantifier were not present. This may be useful for +capture groups that are referenced as +.\" HTML +.\" +subroutines +.\" +from elsewhere in the pattern (but see also the section entitled +.\" HTML +.\" +"Defining capture groups for use by reference only" +.\" +below). Except for parenthesized groups, items that have a {0} quantifier are +omitted from the compiled pattern. +.P +For convenience, the three most common quantifiers have single-character +abbreviations: +.sp + * is equivalent to {0,} + + is equivalent to {1,} + ? is equivalent to {0,1} +.sp +It is possible to construct infinite loops by following a group that can match +no characters with a quantifier that has no upper limit, for example: +.sp + (a?)* +.sp +Earlier versions of Perl and PCRE1 used to give an error at compile time for +such patterns. However, because there are cases where this can be useful, such +patterns are now accepted, but whenever an iteration of such a group matches no +characters, matching moves on to the next item in the pattern instead of +repeatedly matching an empty string. This does not prevent backtracking into +any of the iterations if a subsequent item fails to match. +.P +By default, quantifiers are "greedy", that is, they match as much as possible +(up to the maximum number of permitted repetitions), without causing the rest +of the pattern to fail. The classic example of where this gives problems is in +trying to match comments in C programs. These appear between /* and */ and +within the comment, individual * and / characters may appear. An attempt to +match C comments by applying the pattern +.sp + /\e*.*\e*/ +.sp +to the string +.sp + /* first comment */ not comment /* second comment */ +.sp +fails, because it matches the entire string owing to the greediness of the .* +item. However, if a quantifier is followed by a question mark, it ceases to be +greedy, and instead matches the minimum number of times possible, so the +pattern +.sp + /\e*.*?\e*/ +.sp +does the right thing with C comments. The meaning of the various quantifiers is +not otherwise changed, just the preferred number of matches. Do not confuse +this use of question mark with its use as a quantifier in its own right. +Because it has two uses, it can sometimes appear doubled, as in +.sp + \ed??\ed +.sp +which matches one digit by preference, but can match two if that is the only +way the rest of the pattern matches. +.P +If the PCRE2_UNGREEDY option is set (an option that is not available in Perl), +the quantifiers are not greedy by default, but individual ones can be made +greedy by following them with a question mark. In other words, it inverts the +default behaviour. +.P +When a parenthesized group is quantified with a minimum repeat count that +is greater than 1 or with a limited maximum, more memory is required for the +compiled pattern, in proportion to the size of the minimum or maximum. +.P +If a pattern starts with .* or .{0,} and the PCRE2_DOTALL option (equivalent +to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is +implicitly anchored, because whatever follows will be tried against every +character position in the subject string, so there is no point in retrying the +overall match at any position after the first. PCRE2 normally treats such a +pattern as though it were preceded by \eA. +.P +In cases where it is known that the subject string contains no newlines, it is +worth setting PCRE2_DOTALL in order to obtain this optimization, or +alternatively, using ^ to indicate anchoring explicitly. +.P +However, there are some cases where the optimization cannot be used. When .* +is inside capturing parentheses that are the subject of a backreference +elsewhere in the pattern, a match at the start may fail where a later one +succeeds. Consider, for example: +.sp + (.*)abc\e1 +.sp +If the subject is "xyz123abc123" the match point is the fourth character. For +this reason, such a pattern is not implicitly anchored. +.P +Another case where implicit anchoring is not applied is when the leading .* is +inside an atomic group. Once again, a match at the start may fail where a later +one succeeds. Consider this pattern: +.sp + (?>.*?a)b +.sp +It matches "ab" in the subject "aab". The use of the backtracking control verbs +(*PRUNE) and (*SKIP) also disable this optimization. To do so explicitly, +either pass the compile option PCRE2_NO_DOTSTAR_ANCHOR, or call +\fBpcre2_set_optimize()\fP with a PCRE2_DOTSTAR_ANCHOR_OFF directive. +.P +When a capture group is repeated, the value captured is the substring that +matched the final iteration. For example, after +.sp + (tweedle[dume]{3}\es*)+ +.sp +has matched "tweedledum tweedledee" the value of the captured substring is +"tweedledee". However, if there are nested capture groups, the corresponding +captured values may have been set in previous iterations. For example, after +.sp + (a|(b))+ +.sp +matches "aba" the value of the second captured substring is "b". +. +. +.\" HTML +.SH "ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS" +.rs +.sp +With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy") +repetition, failure of what follows normally causes the repeated item to be +re-evaluated to see if a different number of repeats allows the rest of the +pattern to match. Sometimes it is useful to prevent this, either to change the +nature of the match, or to cause it fail earlier than it otherwise might, when +the author of the pattern knows there is no point in carrying on. +.P +Consider, for example, the pattern \ed+foo when applied to the subject line +.sp + 123456bar +.sp +After matching all 6 digits and then failing to match "foo", the normal +action of the matcher is to try again with only 5 digits matching the \ed+ +item, and then with 4, and so on, before ultimately failing. "Atomic grouping" +(a term taken from Jeffrey Friedl's book) provides the means for specifying +that once a group has matched, it is not to be re-evaluated in this way. +.P +If we use atomic grouping for the previous example, the matcher gives up +immediately on failing to match "foo" the first time. The notation is a kind of +special parenthesis, starting with (?> as in this example: +.sp + (?>\ed+)foo +.sp +Perl 5.28 introduced an experimental alphabetic form starting with (* which may +be easier to remember: +.sp + (*atomic:\ed+)foo +.sp +This kind of parenthesized group "locks up" the part of the pattern it contains +once it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal. +.P +An alternative description is that a group of this type matches exactly the +string of characters that an identical standalone pattern would match, if +anchored at the current point in the subject string. +.P +Atomic groups are not capture groups. Simple cases such as the above example +can be thought of as a maximizing repeat that must swallow everything it can. +So, while both \ed+ and \ed+? are prepared to adjust the number of digits they +match in order to make the rest of the pattern match, (?>\ed+) can only match +an entire sequence of digits. +.P +Atomic groups in general can of course contain arbitrarily complicated +expressions, and can be nested. However, when the contents of an atomic +group is just a single repeated item, as in the example above, a simpler +notation, called a "possessive quantifier" can be used. This consists of an +additional + character following a quantifier. Using this notation, the +previous example can be rewritten as +.sp + \ed++foo +.sp +Note that a possessive quantifier can be used with an entire group, for +example: +.sp + (abc|xyz){2,3}+ +.sp +Possessive quantifiers are always greedy; the setting of the PCRE2_UNGREEDY +option is ignored. They are a convenient notation for the simpler forms of +atomic group. However, there is no difference in the meaning of a possessive +quantifier and the equivalent atomic group, though there may be a performance +difference; possessive quantifiers should be slightly faster. +.P +The possessive quantifier syntax is an extension to the Perl 5.8 syntax. +Jeffrey Friedl originated the idea (and the name) in the first edition of his +book. Mike McCloskey liked it, so implemented it when he built Sun's Java +package, and PCRE1 copied it from there. It found its way into Perl at release +5.10. +.P +PCRE2 has an optimization that automatically "possessifies" certain simple +pattern constructs. For example, the sequence A+B is treated as A++B because +there is no point in backtracking into a sequence of A's when B must follow. +This feature can be disabled by the PCRE2_NO_AUTO_POSSESS option, by calling +\fBpcre2_set_optimize()\fP with a PCRE2_AUTO_POSSESS_OFF directive, or by +starting the pattern with (*NO_AUTO_POSSESS). +.P +When a pattern contains an unlimited repeat inside a group that can itself be +repeated an unlimited number of times, the use of an atomic group is the only +way to avoid some failing matches taking a very long time indeed. The pattern +.sp + (\eD+|<\ed+>)*[!?] +.sp +matches an unlimited number of substrings that either consist of non-digits, or +digits enclosed in <>, followed by either ! or ?. When it matches, it runs +quickly. However, if it is applied to +.sp + aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +.sp +it takes a long time before reporting failure. This is because the string can +be divided between the internal \eD+ repeat and the external * repeat in a +large number of ways, and all have to be tried. (The example uses [!?] rather +than a single character at the end, because both PCRE2 and Perl have an +optimization that allows for fast failure when a single character is used. They +remember the last single character that is required for a match, and fail early +if it is not present in the string.) If the pattern is changed so that it uses +an atomic group, like this: +.sp + ((?>\eD+)|<\ed+>)*[!?] +.sp +sequences of non-digits cannot be broken, and failure happens quickly. +. +. +.\" HTML +.SH "BACKREFERENCES" +.rs +.sp +Outside a character class, a backslash followed by a digit greater than 0 (and +possibly further digits) is a backreference to a capture group earlier (that +is, to its left) in the pattern, provided there have been that many previous +capture groups. +.P +However, if the decimal number following the backslash is less than 8, it is +always taken as a backreference, and causes an error only if there are not that +many capture groups in the entire pattern. In other words, the group that is +referenced need not be to the left of the reference for numbers less than 8. A +"forward backreference" of this type can make sense when a repetition is +involved and the group to the right has participated in an earlier iteration. +.P +It is not possible to have a numerical "forward backreference" to a group whose +number is 8 or more using this syntax because a sequence such as \e50 is +interpreted as a character defined in octal. See the subsection entitled +"Non-printing characters" +.\" HTML +.\" +above +.\" +for further details of the handling of digits following a backslash. Other +forms of backreferencing do not suffer from this restriction. In particular, +there is no problem when named capture groups are used (see below). +.P +Another way of avoiding the ambiguity inherent in the use of digits following a +backslash is to use the \eg escape sequence. This escape must be followed by a +signed or unsigned number, optionally enclosed in braces. These examples are +all identical: +.sp + (ring), \e1 + (ring), \eg1 + (ring), \eg{1} +.sp +An unsigned number specifies an absolute reference without the ambiguity that +is present in the older syntax. It is also useful when literal digits follow +the reference. A signed number is a relative reference. Consider this example: +.sp + (abc(def)ghi)\eg{-1} +.sp +The sequence \eg{-1} is a reference to the capture group whose number is one +less than the number of the next group to be started, so in this example (where +the next group would be numbered 3) is it equivalent to \e2, and \eg{-2} would +be equivalent to \e1. Note that if this construct is inside a capture group, +that group is included in the count, so in this example \eg{-2} also refers to +group 1: +.sp + (A)(\eg{-2}B) +.sp +The use of relative references can be helpful in long patterns, and also in +patterns that are created by joining together fragments that contain references +within themselves. +.P +The sequence \eg{+1} is a reference to the next capture group that is started +after this item, and \eg{+2} refers to the one after that, and so on. This kind +of forward reference can be useful in patterns that repeat. Perl does not +support the use of + in this way. +.P +A backreference matches whatever actually most recently matched the capture +group in the current subject string, rather than anything at all that matches +the group (see +.\" HTML +.\" +"Groups as subroutines" +.\" +below for a way of doing that). So the pattern +.sp + (sens|respons)e and \e1ibility +.sp +matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If caseful matching is in force at the time of the +backreference, the case of letters is relevant. For example, +.sp + ((?i)rah)\es+\e1 +.sp +matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original +capture group is matched caselessly. +.P +There are several different ways of writing backreferences to named capture +groups. The .NET syntax is \ek{name}, the Python syntax is (?=name), and the +original Perl syntax is \ek or \ek'name'. All of these are now supported +by both Perl and PCRE2. Perl 5.10's unified backreference syntax, in which \eg +can be used for both numeric and named references, is also supported by PCRE2. +We could rewrite the above example in any of the following ways: +.sp + (?(?i)rah)\es+\ek + (?'p1'(?i)rah)\es+\ek{p1} + (?P(?i)rah)\es+(?P=p1) + (?(?i)rah)\es+\eg{p1} +.sp +A capture group that is referenced by name may appear in the pattern before or +after the reference. +.P +There may be more than one backreference to the same group. If a group has not +actually been used in a particular match, backreferences to it always fail by +default. For example, the pattern +.sp + (a|(bc))\e2 +.sp +always fails if it starts to match "a" rather than "bc". However, if the +PCRE2_MATCH_UNSET_BACKREF option is set at compile time, a backreference to an +unset value matches an empty string. +.P +Because there may be many capture groups in a pattern, all digits following a +backslash are taken as part of a potential backreference number. If the pattern +continues with a digit character, some delimiter must be used to terminate the +backreference. If the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, this +can be white space. Otherwise, the \eg{} syntax or an empty comment (see +.\" HTML +.\" +"Comments" +.\" +below) can be used. +. +. +.SS "Recursive backreferences" +.rs +.sp +A backreference that occurs inside the group to which it refers fails when the +group is first used, so, for example, (a\e1) never matches. However, such +references can be useful inside repeated groups. For example, the pattern +.sp + (a|b\e1)+ +.sp +matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of +the group, the backreference matches the character string corresponding to the +previous iteration. In order for this to work, the pattern must be such that +the first iteration does not need to match the backreference. This can be done +using alternation, as in the example above, or by a quantifier with a minimum +of zero. +.P +For versions of PCRE2 less than 10.25, backreferences of this type used to +cause the group that they reference to be treated as an +.\" HTML +.\" +atomic group. +.\" +This restriction no longer applies, and backtracking into such groups can occur +as normal. +. +. +.\" HTML +.SH ASSERTIONS +.rs +.sp +An assertion is a test that does not consume any characters. The test must +succeed for the match to continue. The simple assertions coded as \eb, \eB, +\eA, \eG, \eZ, \ez, ^ and $ are described +.\" HTML +.\" +above. +.\" +.P +More complicated assertions are coded as parenthesized groups. If matching such +a group succeeds, matching continues after it, but with the matching position +in the subject string reset to what it was before the assertion was processed. +.P +A special kind of assertion, called a "scan substring" assertion, matches a +subpattern against a previously captured substring. This is described in the +section entitled +.\" HTML +.\" +"Scan substring assertions" +.\" +below. It is a PCRE2 extension, not compatible with Perl. +.P +The other goup-based assertions are of two kinds: those that look ahead of the +current position in the subject string, and those that look behind it, and in +each case an assertion may be positive (must match for the assertion to be +true) or negative (must not match for the assertion to be true). +.P +The Perl-compatible lookaround assertions are atomic. If an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic assertions can be +useful. PCRE2 has some support for these, described in the section entitled +.\" HTML +.\" +"Non-atomic assertions" +.\" +below, but they are not Perl-compatible. +.P +A lookaround assertion may appear as the condition in a +.\" HTML +.\" +conditional group +.\" +(see below). In this case, the result of matching the assertion determines +which branch of the condition is followed. +.P +Assertion groups are not capture groups. If an assertion contains capture +groups within it, these are counted for the purposes of numbering the capture +groups in the whole pattern. Within each branch of an assertion, locally +captured substrings may be referenced in the usual way. For example, a sequence +such as (.)\eg{-1} can be used to check that two adjacent characters are the +same. +.P +When a branch within an assertion fails to match, any substrings that were +captured are discarded (as happens with any pattern branch that fails to +match). A negative assertion is true only when all its branches fail to match; +this means that no captured substrings are ever retained after a successful +negative assertion. When an assertion contains a matching branch, what happens +depends on the type of assertion. +.P +For a positive assertion, internally captured substrings in the successful +branch are retained, and matching continues with the next pattern item after +the assertion. For a negative assertion, a matching branch means that the +assertion is not true. If such an assertion is being used as a condition in a +.\" HTML +.\" +conditional group +.\" +(see below), captured substrings are retained, because matching continues with +the "no" branch of the condition. For other failing negative assertions, +control passes to the previous backtracking point, thus discarding any captured +strings within the assertion. +.P +Most assertion groups may be repeated; though it makes no sense to assert the +same thing several times, the side effect of capturing in positive assertions +may occasionally be useful. However, an assertion that forms the condition for +a conditional group may not be quantified. PCRE2 used to restrict the +repetition of assertions, but from release 10.35 the only restriction is that +an unlimited maximum repetition is changed to be one more than the minimum. For +example, {3,} is treated as {3,4}. +. +. +.SS "Alphabetic assertion names" +.rs +.sp +Traditionally, symbolic sequences such as (?= and (?<= have been used to +specify lookaround assertions. Perl 5.28 introduced some experimental +alphabetic alternatives which might be easier to remember. They all start with +(* instead of (? and must be written using lower case letters. PCRE2 supports +the following synonyms: +.sp + (*positive_lookahead: or (*pla: is the same as (?= + (*negative_lookahead: or (*nla: is the same as (?! + (*positive_lookbehind: or (*plb: is the same as (?<= + (*negative_lookbehind: or (*nlb: is the same as (? +.SS "Lookbehind assertions" +.rs +.sp +Lookbehind assertions start with (?<= for positive assertions and (? +.\" +(see above) +.\" +can be used instead of a lookbehind assertion at the start of a pattern to get +round the length limit restriction. +.P +In UTF-8 and UTF-16 modes, PCRE2 does not allow the \eC escape (which matches a +single code unit even in a UTF mode) to appear in lookbehind assertions, +because it makes it impossible to calculate the length of the lookbehind. The +\eX and \eR escapes, which can match different numbers of code units, are never +permitted in lookbehinds. +.P +.\" HTML +.\" +"Subroutine" +.\" +calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long +as the called capture group matches a limited-length string. However, +.\" HTML +.\" +recursion, +.\" +that is, a "subroutine" call into a group that is already active, +is not supported. +.P +PCRE2 supports backreferences in lookbehinds, but only if certain conditions +are met. The PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no +use of (?| in the pattern (it creates duplicate group numbers), and if the +backreference is by name, the name must be unique. Of course, the referenced +group must itself match a limited length substring. The following pattern +matches words containing at least two characters that begin and end with the +same character: +.sp + \eb(\ew)\ew++(?<=\e1) +.P +Possessive quantifiers can be used in conjunction with lookbehind assertions to +specify efficient matching at the end of subject strings. Consider a simple +pattern such as +.sp + abcd$ +.sp +when applied to a long string that does not match. Because matching proceeds +from left to right, PCRE2 will look for each "a" in the subject and then see if +what follows matches the rest of the pattern. If the pattern is specified as +.sp + ^.*abcd$ +.sp +the initial .* matches the entire string at first, but when this fails (because +there is no following "a"), it backtracks to match all but the last character, +then all but the last two characters, and so on. Once again the search for "a" +covers the entire string, from right to left, so we are no better off. However, +if the pattern is written as +.sp + ^.*+(?<=abcd) +.sp +there can be no backtracking for the .*+ item because of the possessive +quantifier; it can match only the entire string. The subsequent lookbehind +assertion does a single test on the last four characters. If it fails, the +match fails immediately. For long strings, this approach makes a significant +difference to the processing time. +. +. +.SS "Using multiple assertions" +.rs +.sp +Several assertions (of any sort) may occur in succession. For example, +.sp + (?<=\ed{3})(? +.SH "NON-ATOMIC ASSERTIONS" +.rs +.sp +Traditional lookaround assertions are atomic. That is, if an assertion is true, +but there is a subsequent matching failure, there is no backtracking into the +assertion. However, there are some cases where non-atomic positive assertions +can be useful. PCRE2 provides these using the following syntax: +.sp + (*non_atomic_positive_lookahead: or (*napla: or (?* + (*non_atomic_positive_lookbehind: or (*naplb: or (?<* +.sp +Consider the problem of finding the right-most word in a string that also +appears earlier in the string, that is, it must appear at least twice in total. +This pattern returns the required result as captured substring 1: +.sp + ^(?x)(*napla: .* \eb(\ew++)) (?> .*? \eb\e1\eb ){2} +.sp +For a subject such as "word1 word2 word3 word2 word3 word4" the result is +"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the +"x" option, which causes white space (introduced for readability) to be +ignored. Inside the assertion, the greedy .* at first consumes the entire +string, but then has to backtrack until the rest of the assertion can match a +word, which is captured by group 1. In other words, when the assertion first +succeeds, it captures the right-most word in the string. +.P +The current matching point is then reset to the start of the subject, and the +rest of the pattern match checks for two occurrences of the captured word, +using an ungreedy .*? to scan from the left. If this succeeds, we are done, but +if the last word in the string does not occur twice, this part of the pattern +fails. If a traditional atomic lookahead (?= or (*pla: had been used, the +assertion could not be re-entered, and the whole match would fail. The pattern +would succeed only if the very last word in the subject was found twice. +.P +Using a non-atomic lookahead, however, means that when the last word does not +occur twice in the string, the lookahead can backtrack and find the second-last +word, and so on, until either the match succeeds, or all words have been +tested. +.P +Two conditions must be met for a non-atomic assertion to be useful: the +contents of one or more capturing groups must change after a backtrack into the +assertion, and there must be a backreference to a changed group later in the +pattern. If this is not the case, the rest of the pattern match fails exactly +as before because nothing has changed, so using a non-atomic assertion just +wastes resources. +.P +There is one exception to backtracking into a non-atomic assertion. If an +(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That +is, a subsequent match failure cannot backtrack into the assertion. +.P +Non-atomic assertions are not supported by the alternative matching function +\fBpcre2_dfa_match()\fP. They are supported by JIT, but only if they do not +contain any control verbs such as (*ACCEPT). (This may change in future). Note +that assertions that appear as conditions for +.\" HTML +.\" +conditional groups +.\" +(see below) must be atomic. +. +. +.\" HTML +.SH "SCAN SUBSTRING ASSERTIONS" +.rs +.sp +A special kind of assertion, not compatible with Perl, makes it possible to +check the contents of a captured substring by matching it with a subpattern. +Because this involves capturing, this feature is not supported by +\fBpcre2_dfa_match()\fP. +.P +A scan substring assertion starts with the sequence (*scan_substring: or +(*scs: which is followed by a list of substring numbers (absolute or relative) +and/or substring names enclosed in single quotes or angle brackets, all within +parentheses. The rest of the item is the subpattern that is applied to the +substring, as shown in these examples: +.sp + (*scan_substring:(1)...) + (*scs:(-2)...) + (*scs:('AB')...) + (*scs:(1,'AB',-2)...) +.sp +The list of groups is checked in the order they are given, and it is the +contents of the first one that is found to be set that are scanned. When +PCRE2_DUPNAMES is set and there are ambiguous group names, all groups with the +same name are checked in numerical order. A scan substring assertion fails if +none of the groups it references have been set. +.P +The pattern match on the substring is always anchored, that is, it must match +from the start of the substring. There is no "bumpalong" if it does not match +at the start. The end of the subject is temporarily reset to be the end of the +substring, so \eZ, \ez, and $ will match there. However, the start of the +subject is \fInot\fP reset. This means that ^ matches only if the substring is +actually at the start of the main subject, but it also means that lookbehind +assertions into what precedes the substring are possible. +.P +Here is a very simple example: find a word that contains the rare (in English) +sequence of letters "rh" not at the start: +.sp + \eb(\ew++)(*scs:(1).+rh) +.sp +The first group captures a word which is then scanned by the second group. +This example does not actually need this heavyweight feature; the same match +can be achieved with: +.sp + \eb\ew+?rh\ew*\eb +.sp +When things are more complicated, however, scanning a captured substring can be +a useful way to describe the required match. For exmple, there is a rather +complicated pattern in the PCRE2 test data that checks an entire subject string +for a palindrome, that is, the sequence of letters is the same in both +directions. Suppose you want to search for individual words of two or more +characters such as "level" that are palindromes: +.sp + (\eb\ew{2,}+\eb)(*scs:(1)...palindrome-matching-pattern...) +.sp +Within a substring scanning subpattern, references to other groups work as +normal. Capturing groups may appear, and will retain their values during +ongoing matching if the assertion succeeds. +. +. +.SH "SCRIPT RUNS" +.rs +.sp +In concept, a script run is a sequence of characters that are all from the same +Unicode script such as Latin or Greek. However, because some scripts are +commonly used together, and because some diacritical and other marks are used +with multiple scripts, it is not that simple. There is a full description of +the rules that PCRE2 uses in the section entitled +.\" HTML +.\" +"Script Runs" +.\" +in the +.\" HREF +\fBpcre2unicode\fP +.\" +documentation. +.P +If part of a pattern is enclosed between (*script_run: or (*sr: and a closing +parenthesis, it fails if the sequence of characters that it matches are not a +script run. After a failure, normal backtracking occurs. Script runs can be +used to detect spoofing attacks using characters that look the same, but are +from different scripts. The string "paypal.com" is an infamous example, where +the letters could be a mixture of Latin and Cyrillic. This pattern ensures that +the matched characters in a sequence of non-spaces that follow white space are +a script run: +.sp + \es+(*sr:\eS+) +.sp +To be sure that they are all from the Latin script (for example), a lookahead +can be used: +.sp + \es+(?=\ep{Latin})(*sr:\eS+) +.sp +This works as long as the first character is expected to be a character in that +script, and not (for example) punctuation, which is allowed with any script. If +this is not the case, a more creative lookahead is needed. For example, if +digits, underscore, and dots are permitted at the start: +.sp + \es+(?=[0-9_.]*\ep{Latin})(*sr:\eS+) +.sp +.P +In many cases, backtracking into a script run pattern fragment is not +desirable. The script run can employ an atomic group to prevent this. Because +this is a common requirement, a shorthand notation is provided by +(*atomic_script_run: or (*asr: +.sp + (*asr:...) is the same as (*sr:(?>...)) +.sp +Note that the atomic group is inside the script run. Putting it outside would +not prevent backtracking into the script run pattern. +.P +Support for script runs is not available if PCRE2 is compiled without Unicode +support. A compile-time error is given if any of the above constructs is +encountered. Script runs are not supported by the alternate matching function, +\fBpcre2_dfa_match()\fP because they use the same mechanism as capturing +parentheses. +.P +\fBWarning:\fP The (*ACCEPT) control verb +.\" HTML +.\" +(see below) +.\" +should not be used within a script run group, because it causes an immediate +exit from the group, bypassing the script run checking. +. +. +.\" HTML +.SH "CONDITIONAL GROUPS" +.rs +.sp +It is possible to cause the matching process to obey a pattern fragment +conditionally or to choose between two alternative fragments, depending on +the result of an assertion, or whether a specific capture group has +already been matched. The two possible forms of conditional group are: +.sp + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) +.sp +If the condition is satisfied, the yes-pattern is used; otherwise the +no-pattern (if present) is used. An absent no-pattern is equivalent to an empty +string (it always matches). If there are more than two alternatives in the +group, a compile-time error occurs. Each of the two alternatives may itself +contain nested groups of any form, including conditional groups; the +restriction to two alternatives applies only at the level of the condition +itself. This pattern fragment is an example where the alternatives are complex: +.sp + (?(1) (A|B|C) | (D | (?(2)E|F) | E) ) +.sp +.P +There are five kinds of condition: references to capture groups, references to +recursion, two pseudo-conditions called DEFINE and VERSION, and assertions. +. +. +.SS "Checking for a used capture group by number" +.rs +.sp +If the text between the parentheses consists of a sequence of digits, the +condition is true if a capture group of that number has previously matched. If +there is more than one capture group with the same number (see the earlier +.\" +.\" HTML +.\" +section about duplicate group numbers), +.\" +the condition is true if any of them have matched. An alternative notation, +which is a PCRE2 extension, not supported by Perl, is to precede the digits +with a plus or minus sign. In this case, the group number is relative rather +than absolute. The most recently opened capture group (which could be enclosing +this condition) can be referenced by (?(-1), the next most recent by (?(-2), +and so on. Inside loops it can also make sense to refer to subsequent groups. +The next capture group to be opened can be referenced as (?(+1), and so on. The +value zero in any of these forms is not used; it provokes a compile-time error. +.P +Consider the following pattern, which contains non-significant white space to +make it more readable (assume the PCRE2_EXTENDED option) and to divide it into +three parts for ease of discussion: +.sp + ( \e( )? [^()]+ (?(1) \e) ) +.sp +The first part matches an optional opening parenthesis, and if that +character is present, sets it as the first captured substring. The second part +matches one or more characters that are not parentheses. The third part is a +conditional group that tests whether or not the first capture group +matched. If it did, that is, if subject started with an opening parenthesis, +the condition is true, and so the yes-pattern is executed and a closing +parenthesis is required. Otherwise, since no-pattern is not present, the +conditional group matches nothing. In other words, this pattern matches a +sequence of non-parentheses, optionally enclosed in parentheses. +.P +If you were embedding this pattern in a larger one, you could use a relative +reference: +.sp + ...other stuff... ( \e( )? [^()]+ (?(-1) \e) ) ... +.sp +This makes the fragment independent of the parentheses in the larger pattern. +. +. +.SS "Checking for a used capture group by name" +.rs +.sp +Perl uses the syntax (?()...) or (?('name')...) to test for a used +capture group by name. For compatibility with earlier versions of PCRE1, which +had this facility before Perl, the syntax (?(name)...) is also recognized. +Note, however, that undelimited names consisting of the letter R followed by +digits are ambiguous (see the following section). Rewriting the above example +to use a named group gives this: +.sp + (? \e( )? [^()]+ (?() \e) ) +.sp +If the name used in a condition of this kind is a duplicate, the test is +applied to all groups of the same name, and is true if any one of them has +matched. +. +. +.SS "Checking for pattern recursion" +.rs +.sp +"Recursion" in this sense refers to any subroutine-like call from one part of +the pattern to another, whether or not it is actually recursive. See the +sections entitled +.\" HTML +.\" +"Recursive patterns" +.\" +and +.\" HTML +.\" +"Groups as subroutines" +.\" +below for details of recursion and subroutine calls. +.P +If a condition is the string (R), and there is no capture group with the name +R, the condition is true if matching is currently in a recursion or subroutine +call to the whole pattern or any capture group. If digits follow the letter R, +and there is no group with that name, the condition is true if the most recent +call is into a group with the given number, which must exist somewhere in the +overall pattern. This is a contrived example that is equivalent to a+b: +.sp + ((?(R1)a+|(?1)b)) +.sp +However, in both cases, if there is a capture group with a matching name, the +condition tests for its being set, as described in the section above, instead +of testing for recursion. For example, creating a group with the name R1 by +adding (?) to the above pattern completely changes its meaning. +.P +If a name preceded by ampersand follows the letter R, for example: +.sp + (?(R&name)...) +.sp +the condition is true if the most recent recursion is into a group of that name +(which must exist within the pattern). +.P +This condition does not check the entire recursion stack. It tests only the +current level. If the name used in a condition of this kind is a duplicate, the +test is applied to all groups of the same name, and is true if any one of +them is the most recent recursion. +.P +At "top level", all these recursion test conditions are false. +. +. +.\" HTML +.SS "Defining capture groups for use by reference only" +.rs +.sp +If the condition is the string (DEFINE), the condition is always false, even if +there is a group with the name DEFINE. In this case, there may be only one +alternative in the rest of the conditional group. It is always skipped if +control reaches this point in the pattern; the idea of DEFINE is that it can be +used to define subroutines that can be referenced from elsewhere. (The use of +.\" HTML +.\" +subroutines +.\" +is described below.) For example, a pattern to match an IPv4 address such as +"192.168.23.245" could be written like this (ignore white space and line +breaks): +.sp + (?(DEFINE) (? 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) ) + \eb (?&byte) (\e.(?&byte)){3} \eb +.sp +The first part of the pattern is a DEFINE group inside which another group +named "byte" is defined. This matches an individual component of an IPv4 +address (a number less than 256). When matching takes place, this part of the +pattern is skipped because DEFINE acts like a false condition. The rest of the +pattern uses references to the named group to match the four dot-separated +components of an IPv4 address, insisting on a word boundary at each end. +. +. +.SS "Checking the PCRE2 version" +.rs +.sp +Programs that link with a PCRE2 library can check the version by calling +\fBpcre2_config()\fP with appropriate arguments. Users of applications that do +not have access to the underlying code cannot do this. A special "condition" +called VERSION exists to allow such users to discover which version of PCRE2 +they are dealing with by using this condition to match a string such as +"yesno". VERSION must be followed either by "=" or ">=" and a version number. +For example: +.sp + (?(VERSION>=10.4)yes|no) +.sp +This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or +"no" otherwise. The fractional part of the version number may not contain more +than two digits. +. +. +.SS "Assertion conditions" +.rs +.sp +If the condition is not in any of the above formats, it must be a parenthesized +assertion. This may be a positive or negative lookahead or lookbehind +assertion. However, it must be a traditional atomic assertion, not one of the +.\" HTML +.\" +non-atomic assertions. +.\" +.P +Consider this pattern, again containing non-significant white space, and with +the two alternatives on the second line: +.sp + (?(?=[^a-z]*[a-z]) + \ed{2}-[a-z]{3}-\ed{2} | \ed{2}-\ed{2}-\ed{2} ) +.sp +The condition is a positive lookahead assertion that matches an optional +sequence of non-letters followed by a letter. In other words, it tests for the +presence of at least one letter in the subject. If a letter is found, the +subject is matched against the first alternative; otherwise it is matched +against the second. This pattern matches strings in one of the two forms +dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits. +.P +When an assertion that is a condition contains capture groups, any +capturing that occurs in a matching branch is retained afterwards, for both +positive and negative assertions, because matching always continues after the +assertion, whether it succeeds or fails. (Compare non-conditional assertions, +for which captures are retained only for positive assertions that succeed.) +. +. +.\" HTML +.SH COMMENTS +.rs +.sp +There are two ways of including comments in patterns that are processed by +PCRE2. In both cases, the start of the comment must not be in a character +class, nor in the middle of any other sequence of related characters such as +(?: or a group name or number or a Unicode property name. The characters that +make up a comment play no part in the pattern matching. +.P +The sequence (?# marks the start of a comment that continues up to the next +closing parenthesis. Nested parentheses are not permitted. If the +PCRE2_EXTENDED or PCRE2_EXTENDED_MORE option is set, an unescaped # character +also introduces a comment, which in this case continues to immediately after +the next newline character or character sequence in the pattern. Which +characters are interpreted as newlines is controlled by an option passed to the +compiling function or by a special sequence at the start of the pattern, as +described in the section entitled +.\" HTML +.\" +"Newline conventions" +.\" +above. Note that the end of this type of comment is a literal newline sequence +in the pattern; escape sequences that happen to represent a newline do not +count. For example, consider this pattern when PCRE2_EXTENDED is set, and the +default newline convention (a single linefeed character) is in force: +.sp + abc #comment \en still comment +.sp +On encountering the # character, \fBpcre2_compile()\fP skips along, looking for +a newline in the pattern. The sequence \en is still literal at this stage, so +it does not terminate the comment. Only an actual character with the code value +0x0a (the default newline) does so. +. +. +.\" HTML +.SH "RECURSIVE PATTERNS" +.rs +.sp +Consider the problem of matching a string in parentheses, allowing for +unlimited nested parentheses. Without the use of recursion, the best that can +be done is to use a pattern that matches up to some fixed depth of nesting. It +is not possible to handle an arbitrary nesting depth. +.P +For some time, Perl has provided a facility that allows regular expressions to +recurse (amongst other things). It does this by interpolating Perl code in the +expression at run time, and the code can refer to the expression itself. A Perl +pattern using code interpolation to solve the parentheses problem can be +created like this: +.sp + $re = qr{\e( (?: (?>[^()]+) | (?p{$re}) )* \e)}x; +.sp +The (?p{...}) item interpolates Perl code at run time, and in this case refers +recursively to the pattern in which it appears. +.P +Obviously, PCRE2 cannot support the interpolation of Perl code. Instead, it +supports special syntax for recursion of the entire pattern, and also for +individual capture group recursion. After its introduction in PCRE1 and Python, +this kind of recursion was subsequently introduced into Perl at release 5.10. +.P +A special item that consists of (? followed by a number greater than zero and a +closing parenthesis is a recursive subroutine call of the capture group of the +given number, provided that it occurs inside that group. (If not, it is a +.\" HTML +.\" +non-recursive subroutine +.\" +call, which is described in the next section.) The special item (?R) or (?0) is +a recursive call of the entire regular expression. +.P +This PCRE2 pattern solves the nested parentheses problem (assume the +PCRE2_EXTENDED option is set so that white space is ignored): +.sp + \e( ( [^()]++ | (?R) )* \e) +.sp +First it matches an opening parenthesis. Then it matches any number of +substrings which can either be a sequence of non-parentheses, or a recursive +match of the pattern itself (that is, a correctly parenthesized substring). +Finally there is a closing parenthesis. Note the use of a possessive quantifier +to avoid backtracking into sequences of non-parentheses. +.P +If this were part of a larger pattern, you would not want to recurse the entire +pattern, so instead you could use this: +.sp + ( \e( ( [^()]++ | (?1) )* \e) ) +.sp +We have put the pattern into parentheses, and caused the recursion to refer to +them instead of the whole pattern. +.P +In a larger pattern, keeping track of parenthesis numbers can be tricky. This +is made easier by the use of relative references. Instead of (?1) in the +pattern above you can write (?-2) to refer to the second most recently opened +parentheses preceding the recursion. In other words, a negative number counts +capturing parentheses leftwards from the point at which it is encountered. +.P +Be aware however, that if +.\" HTML +.\" +duplicate capture group numbers +.\" +are in use, relative references refer to the earliest group with the +appropriate number. Consider, for example: +.sp + (?|(a)|(b)) (c) (?-2) +.sp +The first two capture groups (a) and (b) are both numbered 1, and group (c) +is number 2. When the reference (?-2) is encountered, the second most recently +opened parentheses has the number 1, but it is the first such group (the (a) +group) to which the recursion refers. This would be the same if an absolute +reference (?1) was used. In other words, relative references are just a +shorthand for computing a group number. +.P +It is also possible to refer to subsequent capture groups, by writing +references such as (?+2). However, these cannot be recursive because the +reference is not inside the parentheses that are referenced. They are always +.\" HTML +.\" +non-recursive subroutine +.\" +calls, as described in the next section. +.P +An alternative approach is to use named parentheses. The Perl syntax for this +is (?&name); PCRE1's earlier syntax (?P>name) is also supported. We could +rewrite the above example as follows: +.sp + (? \e( ( [^()]++ | (?&pn) )* \e) ) +.sp +If there is more than one group with the same name, the earliest one is +used. +.P +The example pattern that we have been looking at contains nested unlimited +repeats, and so the use of a possessive quantifier for matching strings of +non-parentheses is important when applying the pattern to strings that do not +match. For example, when this pattern is applied to +.sp + (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() +.sp +it yields "no match" quickly. However, if a possessive quantifier is not used, +the match runs for a very long time indeed because there are so many different +ways the + and * repeats can carve up the subject, and all have to be tested +before failure can be reported. +.P +At the end of a match, the values of capturing parentheses are those from +the outermost level. If you want to obtain intermediate values, a callout +function can be used (see below and the +.\" HREF +\fBpcre2callout\fP +.\" +documentation). If the pattern above is matched against +.sp + (ab(cd)ef) +.sp +the value for the inner capturing parentheses (numbered 2) is "ef", which is +the last value taken on at the top level. If a capture group is not matched at +the top level, its final captured value is unset, even if it was (temporarily) +set at a deeper level during the matching process. +.P +Do not confuse the (?R) item with the condition (R), which tests for recursion. +Consider this pattern, which matches text in angle brackets, allowing for +arbitrary nesting. Only digits are allowed in nested brackets (that is, when +recursing), whereas any characters are permitted at the outer level. +.sp + < (?: (?(R) \ed++ | [^<>]*+) | (?R)) * > +.sp +In this pattern, (?(R) is the start of a conditional group, with two different +alternatives for the recursive and non-recursive cases. The (?R) item is the +actual recursive call. +. +. +.\" HTML +.SS "Differences in recursion processing between PCRE2 and Perl" +.rs +.sp +Some former differences between PCRE2 and Perl no longer exist. +.P +Before release 10.30, recursion processing in PCRE2 differed from Perl in that +a recursive subroutine call was always treated as an atomic group. That is, +once it had matched some of the subject string, it was never re-entered, even +if it contained untried alternatives and there was a subsequent matching +failure. (Historical note: PCRE implemented recursion before Perl did.) +.P +Starting with release 10.30, recursive subroutine calls are no longer treated +as atomic. That is, they can be re-entered to try unused alternatives if there +is a matching failure later in the pattern. This is now compatible with the way +Perl works. If you want a subroutine call to be atomic, you must explicitly +enclose it in an atomic group. +.P +Supporting backtracking into recursions simplifies certain types of recursive +pattern. For example, this pattern matches palindromic strings: +.sp + ^((.)(?1)\e2|.?)$ +.sp +The second branch in the group matches a single central character in the +palindrome when there are an odd number of characters, or nothing when there +are an even number of characters, but in order to work it has to be able to try +the second case when the rest of the pattern match fails. If you want to match +typical palindromic phrases, the pattern has to ignore all non-word characters, +which can be done like this: +.sp + ^\eW*+((.)\eW*+(?1)\eW*+\e2|\eW*+.?)\eW*+$ +.sp +If run with the PCRE2_CASELESS option, this pattern matches phrases such as "A +man, a plan, a canal: Panama!". Note the use of the possessive quantifier *+ to +avoid backtracking into sequences of non-word characters. Without this, PCRE2 +takes a great deal longer (ten times or more) to match typical phrases, and +Perl takes so long that you think it has gone into a loop. +.P +Another way in which PCRE2 and Perl used to differ in their recursion +processing is in the handling of captured values. Formerly in Perl, when a +group was called recursively or as a subroutine (see the next section), it +had no access to any values that were captured outside the recursion, whereas +in PCRE2 these values can be referenced. Consider this pattern: +.sp + ^(.)(\e1|a(?2)) +.sp +This pattern matches "bab". The first capturing parentheses match "b", then in +the second group, when the backreference \e1 fails to match "b", the second +alternative matches "a" and then recurses. In the recursion, \e1 does now match +"b" and so the whole match succeeds. This match used to fail in Perl, but in +later versions (I tried 5.024) it now works. +. +. +.\" HTML +.SH "GROUPS AS SUBROUTINES" +.rs +.sp +If the syntax for a recursive group call (either by number or by name) is used +outside the parentheses to which it refers, it operates a bit like a subroutine +in a programming language. More accurately, PCRE2 treats the referenced group +as an independent subpattern which it tries to match at the current matching +position. The called group may be defined before or after the reference. A +numbered reference can be absolute or relative, as in these examples: +.sp + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... +.sp +An earlier example pointed out that the pattern +.sp + (sens|respons)e and \e1ibility +.sp +matches "sense and sensibility" and "response and responsibility", but not +"sense and responsibility". If instead the pattern +.sp + (sens|respons)e and (?1)ibility +.sp +is used, it does match "sense and responsibility" as well as the other two +strings. Another example is given in the discussion of DEFINE above. +.P +Like recursions, subroutine calls used to be treated as atomic, but this +changed at PCRE2 release 10.30, so backtracking into subroutine calls can now +occur. However, any capturing parentheses that are set during the subroutine +call revert to their previous values afterwards. +.P +Processing options such as case-independence are fixed when a group is +defined, so if it is used as a subroutine, such options cannot be changed for +different calls. For example, consider this pattern: +.sp + (abc)(?i:(?-1)) +.sp +It matches "abcabc". It does not match "abcABC" because the change of +processing option does not affect the called group. +.P +The behaviour of +.\" HTML +.\" +backtracking control verbs +.\" +in groups when called as subroutines is described in the section entitled +.\" HTML +.\" +"Backtracking verbs in subroutines" +.\" +below. +. +. +.\" HTML +.SH "ONIGURUMA SUBROUTINE SYNTAX" +.rs +.sp +For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or +a number enclosed either in angle brackets or single quotes, is an alternative +syntax for calling a group as a subroutine, possibly recursively. Here are two +of the examples used above, rewritten using this syntax: +.sp + (? \e( ( (?>[^()]+) | \eg )* \e) ) + (sens|respons)e and \eg'1'ibility +.sp +PCRE2 supports an extension to Oniguruma: if a number is preceded by a +plus or a minus sign it is taken as a relative reference. For example: +.sp + (abc)(?i:\eg<-1>) +.sp +Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP +synonymous. The former is a backreference; the latter is a subroutine call. +. +. +.SH CALLOUTS +.rs +.sp +Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl +code to be obeyed in the middle of matching a regular expression. This makes it +possible, amongst other things, to extract different substrings that match the +same pair of parentheses when there is a repetition. +.P +PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl +code. The feature is called "callout". The caller of PCRE2 provides an external +function by putting its entry point in a match context using the function +\fBpcre2_set_callout()\fP, and then passing that context to \fBpcre2_match()\fP +or \fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout +entry point is set to NULL, callout points will be passed over silently during +matching. To disallow callouts in the pattern syntax, you may use the +PCRE2_EXTRA_NEVER_CALLOUT option. +.P +Within a regular expression, (?C) indicates a point at which the external +function is to be called. There are two kinds of callout: those with a +numerical argument and those with a string argument. (?C) on its own with no +argument is treated as (?C0). A numerical argument allows the application to +distinguish between different callouts. String arguments were added for release +10.20 to make it possible for script languages that use PCRE2 to embed short +scripts within patterns in a similar way to Perl. +.P +During matching, when PCRE2 reaches a callout point, the external function is +called. It is provided with the number or string argument of the callout, the +position in the pattern, and one item of data that is also set in the match +block. The callout function may cause matching to proceed, to backtrack, or to +fail. +.P +By default, PCRE2 implements a number of optimizations at matching time, and +one side-effect is that sometimes callouts are skipped. If you need all +possible callouts to happen, you need to set options that disable the relevant +optimizations. More details, including a complete description of the +programming interface to the callout function, are given in the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +. +. +.SS "Callouts with numerical arguments" +.rs +.sp +If you just want to have a means of identifying different callout points, put a +number less than 256 after the letter C. For example, this pattern has two +callout points: +.sp + (?C1)abc(?C2)def +.sp +If the PCRE2_AUTO_CALLOUT flag is passed to \fBpcre2_compile()\fP, numerical +callouts are automatically installed before each item in the pattern. They are +all numbered 255. If there is a conditional group in the pattern whose +condition is an assertion, an additional callout is inserted just before the +condition. An explicit callout may also be set at this position, as in this +example: +.sp + (?(?C9)(?=a)abc|def) +.sp +Note that this applies only to assertion conditions, not to other types of +condition. +. +. +.SS "Callouts with string arguments" +.rs +.sp +A delimited string may be used instead of a number as a callout argument. The +starting delimiter must be one of ` ' " ^ % # $ { and the ending delimiter is +the same as the start, except for {, where the ending delimiter is }. If the +ending delimiter is needed within the string, it must be doubled. For +example: +.sp + (?C'ab ''c'' d')xyz(?C{any text})pqr +.sp +The doubling is removed before the string is passed to the callout function. +. +. +.\" HTML +.SH "BACKTRACKING CONTROL" +.rs +.sp +There are a number of special "Backtracking Control Verbs" (to use Perl's +terminology) that modify the behaviour of backtracking during matching. They +are generally of the form (*VERB) or (*VERB:NAME). Some verbs take either form, +and may behave differently depending on whether or not a name argument is +present. The names are not required to be unique within the pattern. +.P +By default, for compatibility with Perl, a name is any sequence of characters +that does not include a closing parenthesis. The name is not processed in +any way, and it is not possible to include a closing parenthesis in the name. +This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result +is no longer Perl-compatible. +.P +When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names +and only an unescaped closing parenthesis terminates the name. However, the +only backslash items that are permitted are \eQ, \eE, and sequences such as +\ex{100} that define character code points. Character type escapes such as \ed +are faulted. +.P +A closing parenthesis can be included in a name either as \e) or between \eQ +and \eE. In addition to backslash processing, if the PCRE2_EXTENDED or +PCRE2_EXTENDED_MORE option is also set, unescaped whitespace in verb names is +skipped, and #-comments are recognized, exactly as in the rest of the pattern. +PCRE2_EXTENDED and PCRE2_EXTENDED_MORE do not affect verb names unless +PCRE2_ALT_VERBNAMES is also set. +.P +The maximum length of a name is 255 in the 8-bit library and 65535 in the +16-bit and 32-bit libraries. If the name is empty, that is, if the closing +parenthesis immediately follows the colon, the effect is as if the colon were +not there. Any number of these verbs may occur in a pattern. Except for +(*ACCEPT), they may not be quantified. +.P +Since these verbs are specifically related to backtracking, most of them can be +used only when the pattern is to be matched using the traditional matching +function or JIT, because they use backtracking algorithms. With the exception +of (*FAIL), which behaves like a failing negative assertion, the backtracking +control verbs cause an error if encountered by the DFA matching function. +.P +The behaviour of these verbs in +.\" HTML +.\" +repeated groups, +.\" +.\" HTML +.\" +assertions, +.\" +and in +.\" HTML +.\" +capture groups called as subroutines +.\" +(whether or not recursively) is documented below. +. +. +.\" HTML +.SS "Optimizations that affect backtracking verbs" +.rs +.sp +PCRE2 contains some optimizations that are used to speed up matching by running +some checks at the start of each match attempt. For example, it may know the +minimum length of matching subject, or that a particular character must be +present. When one of these optimizations bypasses the running of a match, any +included backtracking verbs will not, of course, be processed. You can suppress +the start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option +when calling \fBpcre2_compile()\fP, by calling \fBpcre2_set_optimize()\fP with a +PCRE2_START_OPTIMIZE_OFF directive, or by starting the pattern with +(*NO_START_OPT). There is more discussion of this option in the section +entitled +.\" HTML +.\" +"Compiling a pattern" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +Experiments with Perl suggest that it too has similar optimizations, and like +PCRE2, turning them off can change the result of a match. +. +. +.\" HTML +.SS "Verbs that act immediately" +.rs +.sp +The following verbs act as soon as they are encountered. +.sp + (*ACCEPT) or (*ACCEPT:NAME) +.sp +This verb causes the match to end successfully, skipping the remainder of the +pattern. However, when it is inside a capture group that is called as a +subroutine, only that group is ended successfully. Matching then continues +at the outer level. If (*ACCEPT) in triggered in a positive assertion, the +assertion succeeds; in a negative assertion, the assertion fails. +.P +If (*ACCEPT) is inside capturing parentheses, the data so far is captured. For +example: +.sp + A((?:A|B(*ACCEPT)|C)D) +.sp +This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by +the outer parentheses. +.P +(*ACCEPT) is the only backtracking verb that is allowed to be quantified +because an ungreedy quantification with a minimum of zero acts only when a +backtrack happens. Consider, for example, +.sp + (A(*ACCEPT)??B)C +.sp +where A, B, and C may be complex expressions. After matching "A", the matcher +processes "BC"; if that fails, causing a backtrack, (*ACCEPT) is triggered and +the match succeeds. In both cases, all but C is captured. Whereas (*COMMIT) +(see below) means "fail on backtrack", a repeated (*ACCEPT) of this type means +"succeed on backtrack". +.P +\fBWarning:\fP (*ACCEPT) should not be used within a script run group, because +it causes an immediate exit from the group, bypassing the script run checking. +.sp + (*FAIL) or (*FAIL:NAME) +.sp +This verb causes a matching failure, forcing backtracking to occur. It may be +abbreviated to (*F). It is equivalent to (?!) but easier to read. The Perl +documentation notes that it is probably useful only when combined with (?{}) or +(??{}). Those are, of course, Perl features that are not present in PCRE2. The +nearest equivalent is the callout feature, as for example in this pattern: +.sp + a+(?C)(*FAIL) +.sp +A match with the string "aaaa" always fails, but the callout is taken before +each backtrack happens (in this example, 10 times). +.P +(*ACCEPT:NAME) and (*FAIL:NAME) behave the same as (*MARK:NAME)(*ACCEPT) and +(*MARK:NAME)(*FAIL), respectively, that is, a (*MARK) is recorded just before +the verb acts. +. +. +.SS "Recording which path was taken" +.rs +.sp +There is one verb whose main purpose is to track how a match was arrived at, +though it also has a secondary use in conjunction with advancing the match +starting point (see (*SKIP) below). +.sp + (*MARK:NAME) or (*:NAME) +.sp +A name is always required with this verb. For all the other backtracking +control verbs, a NAME argument is optional. +.P +When a match succeeds, the name of the last-encountered mark name on the +matching path is passed back to the caller as described in the section entitled +.\" HTML +.\" +"Other information about the match" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. This applies to all instances of (*MARK) and other verbs, +including those inside assertions and atomic groups. However, there are +differences in those cases when (*MARK) is used in conjunction with (*SKIP) as +described below. +.P +The mark name that was last encountered on the matching path is passed back. A +verb without a NAME argument is ignored for this purpose. Here is an example of +\fBpcre2test\fP output, where the "mark" modifier requests the retrieval and +outputting of (*MARK) data: +.sp + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XY + 0: XY + MK: A + XZ + 0: XZ + MK: B +.sp +The (*MARK) name is tagged with "MK:" in this output, and in this example it +indicates which of the two alternatives matched. This is a more efficient way +of obtaining this information than putting each alternative in its own +capturing parentheses. +.P +If a verb with a name is encountered in a positive assertion that is true, the +name is recorded and passed back if it is the last-encountered. This does not +happen for negative assertions or failing positive assertions. +.P +After a partial match or a failed match, the last encountered name in the +entire match process is returned. For example: +.sp + re> /X(*MARK:A)Y|X(*MARK:B)Z/mark + data> XP + No match, mark = B +.sp +Note that in this unanchored example the mark is retained from the match +attempt that started at the letter "X" in the subject. Subsequent match +attempts starting at "P" and then with an empty string do not get as far as the +(*MARK) item, but nevertheless do not reset it. +.P +If you are interested in (*MARK) values after failed matches, you should +probably either set the PCRE2_NO_START_OPTIMIZE option or call +\fBpcre2_set_optimize()\fP with a PCRE2_START_OPTIMIZE_OFF directive +.\" HTML +.\" +(see above) +.\" +to ensure that the match is always attempted. +. +. +.SS "Verbs that act after backtracking" +.rs +.sp +The following verbs do nothing when they are encountered. Matching continues +with what follows, but if there is a subsequent match failure, causing a +backtrack to the verb, a failure is forced. That is, backtracking cannot pass +to the left of the verb. However, when one of these verbs appears inside an +atomic group or in an atomic lookaround assertion that is true, its effect is +confined to that group, because once the group has been matched, there is never +any backtracking into it. Backtracking from beyond an atomic assertion or group +ignores the entire group, and seeks a preceding backtracking point. +.P +These verbs differ in exactly what kind of failure occurs when backtracking +reaches them. The behaviour described below is what happens when the verb is +not in a subroutine or an assertion. Subsequent sections cover these special +cases. +.sp + (*COMMIT) or (*COMMIT:NAME) +.sp +This verb causes the whole match to fail outright if there is a later matching +failure that causes backtracking to reach it. Even if the pattern is +unanchored, no further attempts to find a match by advancing the starting point +take place. If (*COMMIT) is the only backtracking verb that is encountered, +once it has been passed \fBpcre2_match()\fP is committed to finding a match at +the current starting point, or not at all. For example: +.sp + a+(*COMMIT)b +.sp +This matches "xxaab" but not "aacaab". It can be thought of as a kind of +dynamic anchor, or "I've started, so I must finish." +.P +The behaviour of (*COMMIT:NAME) is not the same as (*MARK:NAME)(*COMMIT). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names that are set with +(*MARK), ignoring those set by any of the other backtracking verbs. +.P +If there is more than one backtracking verb in a pattern, a different one that +follows (*COMMIT) may be triggered first, so merely passing (*COMMIT) during a +match does not always guarantee that a match must be at this starting point. +.P +Note that (*COMMIT) at the start of a pattern is not the same as an anchor, +unless PCRE2's start-of-match optimizations are turned off, as shown in this +output from \fBpcre2test\fP: +.sp + re> /(*COMMIT)abc/ + data> xyzabc + 0: abc + data> + re> /(*COMMIT)abc/no_start_optimize + data> xyzabc + No match +.sp +For the first pattern, PCRE2 knows that any match must start with "a", so the +optimization skips along the subject to "a" before applying the pattern to the +first set of data. The match attempt then succeeds. The second pattern disables +the optimization that skips along to the first character. The pattern is now +applied starting at "x", and so the (*COMMIT) causes the match to fail without +trying any other starting points. +.sp + (*PRUNE) or (*PRUNE:NAME) +.sp +This verb causes the match to fail at the current starting position in the +subject if there is a later matching failure that causes backtracking to reach +it. If the pattern is unanchored, the normal "bumpalong" advance to the next +starting character then happens. Backtracking can occur as usual to the left of +(*PRUNE), before it is reached, or when matching to the right of (*PRUNE), but +if there is no match to the right, backtracking cannot cross (*PRUNE). In +simple cases, the use of (*PRUNE) is just an alternative to an atomic group or +possessive quantifier, but there are some uses of (*PRUNE) that cannot be +expressed in any other way. In an anchored pattern (*PRUNE) has the same effect +as (*COMMIT). +.P +The behaviour of (*PRUNE:NAME) is not the same as (*MARK:NAME)(*PRUNE). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +.sp + (*SKIP) +.sp +This verb, when given without a name, is like (*PRUNE), except that if the +pattern is unanchored, the "bumpalong" advance is not to the next character, +but to the position in the subject where (*SKIP) was encountered. (*SKIP) +signifies that whatever text was matched leading up to it cannot be part of a +successful match if there is a later mismatch. Consider: +.sp + a+(*SKIP)b +.sp +If the subject is "aaaac...", after the first match attempt fails (starting at +the first character in the string), the starting point skips on to start the +next attempt at "c". Note that a possessive quantifier does not have the same +effect as this example; although it would suppress backtracking during the +first match attempt, the second attempt would start at the second character +instead of skipping on to "c". +.P +If (*SKIP) is used to specify a new starting position that is the same as the +starting position of the current match, or (by being inside a lookbehind) +earlier, the position specified by (*SKIP) is ignored, and instead the normal +"bumpalong" occurs. +.sp + (*SKIP:NAME) +.sp +When (*SKIP) has an associated name, its behaviour is modified. When such a +(*SKIP) is triggered, the previous path through the pattern is searched for the +most recent (*MARK) that has the same name. If one is found, the "bumpalong" +advance is to the subject position that corresponds to that (*MARK) instead of +to where (*SKIP) was encountered. If no (*MARK) with a matching name is found, +the (*SKIP) is ignored. +.P +The search for a (*MARK) name uses the normal backtracking mechanism, which +means that it does not see (*MARK) settings that are inside atomic groups or +assertions, because they are never re-entered by backtracking. Compare the +following \fBpcre2test\fP examples: +.sp + re> /a(?>(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: a + 1: a + data: + re> /a(?:(*MARK:X))(*SKIP:X)(*F)|(.)/ + data: abc + 0: b + 1: b +.sp +In the first example, the (*MARK) setting is in an atomic group, so it is not +seen when (*SKIP:X) triggers, causing the (*SKIP) to be ignored. This allows +the second branch of the pattern to be tried at the first character position. +In the second example, the (*MARK) setting is not in an atomic group. This +allows (*SKIP:X) to find the (*MARK) when it backtracks, and this causes a new +matching attempt to start at the second character. This time, the (*MARK) is +never seen because "a" does not match "b", so the matcher immediately jumps to +the second branch of the pattern. +.P +Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It ignores +names that are set by other backtracking verbs. +.sp + (*THEN) or (*THEN:NAME) +.sp +This verb causes a skip to the next innermost alternative when backtracking +reaches it. That is, it cancels any further backtracking within the current +alternative. Its name comes from the observation that it can be used for a +pattern-based if-then-else block: +.sp + ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ... +.sp +If the COND1 pattern matches, FOO is tried (and possibly further items after +the end of the group if FOO succeeds); on failure, the matcher skips to the +second alternative and tries COND2, without backtracking into COND1. If that +succeeds and BAR fails, COND3 is tried. If subsequently BAZ fails, there are no +more alternatives, so there is a backtrack to whatever came before the entire +group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). +.P +The behaviour of (*THEN:NAME) is not the same as (*MARK:NAME)(*THEN). It is +like (*MARK:NAME) in that the name is remembered for passing back to the +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +ignoring those set by other backtracking verbs. +.P +A group that does not contain a | character is just a part of the enclosing +alternative; it is not a nested alternation with only one alternative. The +effect of (*THEN) extends beyond such a group to the enclosing alternative. +Consider this pattern, where A, B, etc. are complex pattern fragments that do +not contain any | characters at this level: +.sp + A (B(*THEN)C) | D +.sp +If A and B are matched, but there is a failure in C, matching does not +backtrack into A; instead it moves to the next alternative, that is, D. +However, if the group containing (*THEN) is given an alternative, it +behaves differently: +.sp + A (B(*THEN)C | (*FAIL)) | D +.sp +The effect of (*THEN) is now confined to the inner group. After a failure in C, +matching moves to (*FAIL), which causes the whole group to fail because there +are no more alternatives to try. In this case, matching does backtrack into A. +.P +Note that a conditional group is not considered as having two alternatives, +because only one is ever used. In other words, the | character in a conditional +group has a different meaning. Ignoring white space, consider: +.sp + ^.*? (?(?=a) a | b(*THEN)c ) +.sp +If the subject is "ba", this pattern does not match. Because .*? is ungreedy, +it initially matches zero characters. The condition (?=a) then fails, the +character "b" is matched, but "c" is not. At this point, matching does not +backtrack to .*? as might perhaps be expected from the presence of the | +character. The conditional group is part of the single alternative that +comprises the whole pattern, and so the match fails. (If there was a backtrack +into .*?, allowing it to match "b", the match would succeed.) +.P +The verbs just described provide four different "strengths" of control when +subsequent matching fails. (*THEN) is the weakest, carrying on the match at the +next alternative. (*PRUNE) comes next, failing the match at the current +starting position, but allowing an advance to the next character (for an +unanchored pattern). (*SKIP) is similar, except that the advance may be more +than one character. (*COMMIT) is the strongest, causing the entire match to +fail. +. +. +.SS "More than one backtracking verb" +.rs +.sp +If more than one backtracking verb is present in a pattern, the one that is +backtracked onto first acts. For example, consider this pattern, where A, B, +etc. are complex pattern fragments: +.sp + (A(*COMMIT)B(*THEN)C|ABD) +.sp +If A matches but B fails, the backtrack to (*COMMIT) causes the entire match to +fail. However, if A and B match, but C fails, the backtrack to (*THEN) causes +the next alternative (ABD) to be tried. This behaviour is consistent, but is +not always the same as Perl's. It means that if two or more backtracking verbs +appear in succession, all but the last of them has no effect. Consider this +example: +.sp + ...(*COMMIT)(*PRUNE)... +.sp +If there is a matching failure to the right, backtracking onto (*PRUNE) causes +it to be triggered, and its action is taken. There can never be a backtrack +onto (*COMMIT). +. +. +.\" HTML +.SS "Backtracking verbs in repeated groups" +.rs +.sp +PCRE2 sometimes differs from Perl in its handling of backtracking verbs in +repeated groups. For example, consider: +.sp + /(a(*COMMIT)b)+ac/ +.sp +If the subject is "abac", Perl matches unless its optimizations are disabled, +but PCRE2 always fails because the (*COMMIT) in the second repeat of the group +acts. +. +. +.\" HTML +.SS "Backtracking verbs in assertions" +.rs +.sp +(*FAIL) in any assertion has its normal effect: it forces an immediate +backtrack. The behaviour of the other backtracking verbs depends on whether or +not the assertion is standalone or acting as the condition in a conditional +group. +.P +(*ACCEPT) in a standalone positive assertion causes the assertion to succeed +without any further processing; captured strings and a mark name (if set) are +retained. In a standalone negative assertion, (*ACCEPT) causes the assertion to +fail without any further processing; captured substrings and any mark name are +discarded. +.P +If the assertion is a condition, (*ACCEPT) causes the condition to be true for +a positive assertion and false for a negative one; captured substrings are +retained in both cases. +.P +The remaining verbs act only when a later failure causes a backtrack to +reach them. This means that, for the Perl-compatible assertions, their effect +is confined to the assertion, because Perl lookaround assertions are atomic. A +backtrack that occurs after such an assertion is complete does not jump back +into the assertion. Note in particular that a (*MARK) name that is set in an +assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern. +.P +PCRE2 now supports non-atomic positive assertions and also "scan substring" +assertions, as described in the sections entitled +.\" HTML +.\" +"Non-atomic assertions" +.\" +and +.\" HTML +.\" +"Scan substring assertions" +.\" +above. These assertions must be standalone (not used as conditions). They are +not Perl-compatible. For these assertions, a later backtrack does jump back +into the assertion, and therefore verbs such as (*COMMIT) can be triggered by +backtracks from later in the pattern. +.P +The effect of (*THEN) is not allowed to escape beyond an assertion. If there +are no more branches to try, (*THEN) causes a positive assertion to be false, +and a negative assertion to be true. This behaviour differs from Perl when the +assertion has only one branch. +.P +The other backtracking verbs are not treated specially if they appear in a +standalone positive assertion. In a conditional positive assertion, +backtracking (from within the assertion) into (*COMMIT), (*SKIP), or (*PRUNE) +causes the condition to be false. However, for both standalone and conditional +negative assertions, backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes +the assertion to be true, without considering any further alternative branches. +. +. +.\" HTML +.SS "Backtracking verbs in subroutines" +.rs +.sp +These behaviours occur whether or not the group is called recursively. +.P +(*ACCEPT) in a group called as a subroutine causes the subroutine match to +succeed without any further processing. Matching then continues after the +subroutine call. Perl documents this behaviour. Perl's treatment of the other +verbs in subroutines is different in some cases. +.P +(*FAIL) in a group called as a subroutine has its normal effect: it forces +an immediate backtrack. +.P +(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when +triggered by being backtracked to in a group called as a subroutine. There is +then a backtrack at the outer level. +.P +(*THEN), when triggered, skips to the next alternative in the innermost +enclosing group that has alternatives (its normal behaviour). However, if there +is no such group within the subroutine's group, the subroutine match fails and +there is a backtrack at the outer level. +. +. +.\" HTML +.SH "EBCDIC ENVIRONMENTS" +.rs +.sp +Differences in the way PCRE behaves when it is running in an EBCDIC environment +are covered in this section. +. +. +.SS "Escape sequences" +.rs +.sp +When PCRE2 is compiled in EBCDIC mode, \eN{U+hhh..} is not supported. \ea, \ee, +\ef, \en, \er, and \et generate the appropriate EBCDIC code values. The \ec +escape is processed as specified for Perl in the \fBperlebcdic\fP document. The +only characters that are allowed after \ec are A-Z, a-z, or one of @, [, \e, ], +^, _, or ?. Any other character provokes a compile-time error. The sequence +\ec@ encodes character code 0; after \ec the letters (in either case) encode +characters 1-26 (hex 01 to hex 1A); [, \e, ], ^, and _ encode characters 27-31 +(hex 1B to hex 1F), and \ec? becomes either 255 (hex FF) or 95 (hex 5F). +.P +Thus, apart from \ec?, these escapes generate the same character code values as +they do in an ASCII or Unicode environment, though the meanings of the values +mostly differ. For example, \ecG always generates code value 7, which is BEL in +ASCII but DEL in EBCDIC. +.P +The sequence \ec? generates DEL (127, hex 7F) in an ASCII environment, but +because 127 is not a control character in EBCDIC, Perl makes it generate the +APC character. Unfortunately, there are several variants of EBCDIC. In most of +them the APC character has the value 255 (hex FF), but in the one Perl calls +POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC +values, PCRE2 makes \ec? generate 95; otherwise it generates 255. +. +. +.SS "Character classes" +.rs +.sp +In character classes there is a special case in EBCDIC environments for ranges +whose end points are both specified as literal letters in the same case. For +compatibility with Perl, EBCDIC code points within the range that are not +letters are omitted. For example, [h-k] matches only four characters, even +though the EBCDIC codes for h and k are 0x88 and 0x92, a range of 11 code +points. However, if the range is specified numerically, for example, +[\ex88-\ex92] or [h-\ex92], all code points are included. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2api\fP(3), \fBpcre2callout\fP(3), \fBpcre2matching\fP(3), +\fBpcre2syntax\fP(3), \fBpcre2\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 November 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2perform.3 b/3rd/pcre2/doc/pcre2perform.3 new file mode 100644 index 00000000..02edd38f --- /dev/null +++ b/3rd/pcre2/doc/pcre2perform.3 @@ -0,0 +1,260 @@ +.TH PCRE2PERFORM 3 "06 December 2022" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 PERFORMANCE" +.rs +.sp +Two aspects of performance are discussed below: memory usage and processing +time. The way you express your pattern as a regular expression can affect both +of them. +. +.SH "COMPILED PATTERN MEMORY USAGE" +.rs +.sp +Patterns are compiled by PCRE2 into a reasonably efficient interpretive code, +so that most simple patterns do not use much memory for storing the compiled +version. However, there is one case where the memory usage of a compiled +pattern can be unexpectedly large. If a parenthesized group has a quantifier +with a minimum greater than 1 and/or a limited maximum, the whole group is +repeated in the compiled code. For example, the pattern +.sp + (abc|def){2,4} +.sp +is compiled as if it were +.sp + (abc|def)(abc|def)((abc|def)(abc|def)?)? +.sp +(Technical aside: It is done this way so that backtrack points within each of +the repetitions can be independently maintained.) +.P +For regular expressions whose quantifiers use only small numbers, this is not +usually a problem. However, if the numbers are large, and particularly if such +repetitions are nested, the memory usage can become an embarrassment. For +example, the very simple pattern +.sp + ((ab){1,1000}c){1,3} +.sp +uses over 50KiB when compiled using the 8-bit library. When PCRE2 is +compiled with its default internal pointer size of two bytes, the size limit on +a compiled pattern is 65535 code units in the 8-bit and 16-bit libraries, and +this is reached with the above pattern if the outer repetition is increased +from 3 to 4. PCRE2 can be compiled to use larger internal pointers and thus +handle larger compiled patterns, but it is better to try to rewrite your +pattern to use less memory if you can. +.P +One way of reducing the memory usage for such patterns is to make use of +PCRE2's +.\" HTML +.\" +"subroutine" +.\" +facility. Re-writing the above pattern as +.sp + ((ab)(?2){0,999}c)(?1){0,2} +.sp +reduces the memory requirements to around 16KiB, and indeed it remains under +20KiB even with the outer repetition increased to 100. However, this kind of +pattern is not always exactly equivalent, because any captures within +subroutine calls are lost when the subroutine completes. If this is not a +problem, this kind of rewriting will allow you to process patterns that PCRE2 +cannot otherwise handle. The matching performance of the two different versions +of the pattern are roughly the same. (This applies from release 10.30 - things +were different in earlier releases.) +. +. +.SH "STACK AND HEAP USAGE AT RUN TIME" +.rs +.sp +From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP +uses very little system stack at run time. In earlier releases recursive +function calls could use a great deal of stack, and this could cause problems, +but this usage has been eliminated. Backtracking positions are now explicitly +remembered in memory frames controlled by the code. +.P +The size of each frame depends on the size of pointer variables and the number +of capturing parenthesized groups in the pattern being matched. On a 64-bit +system the frame size for a pattern with no captures is 128 bytes. For each +capturing group the size increases by 16 bytes. +.P +Until release 10.41, an initial 20KiB frames vector was allocated on the system +stack, but this still caused some issues for multi-thread applications where +each thread has a very small stack. From release 10.41 backtracking memory +frames are always held in heap memory. An initial heap allocation is obtained +the first time any match data block is passed to \fBpcre2_match()\fP. This is +remembered with the match data block and re-used if that block is used for +another match. It is freed when the match data block itself is freed. +.P +The size of the initial block is the larger of 20KiB or ten times the pattern's +frame size, unless the heap limit is less than this, in which case the heap +limit is used. If the initial block proves to be too small during matching, it +is replaced by a larger block, subject to the heap limit. The heap limit is +checked only when a new block is to be allocated. Reducing the heap limit +between calls to \fBpcre2_match()\fP with the same match data block does not +affect the saved block. +.P +In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive +function calls, but only for processing atomic groups, lookaround assertions, +and recursion within the pattern. The original version of the code used to +allocate quite large internal workspace vectors on the stack, which caused some +problems for some patterns in environments with small stacks. From release +10.32 the code for \fBpcre2_dfa_match()\fP has been re-factored to use heap +memory when necessary for internal workspace when recursing, though recursive +function calls are still used. +.P +The "match depth" parameter can be used to limit the depth of function +recursion, and the "match heap" parameter to limit heap memory in +\fBpcre2_dfa_match()\fP. +. +. +.SH "PROCESSING TIME" +.rs +.sp +Certain items in regular expression patterns are processed more efficiently +than others. It is more efficient to use a character class like [aeiou] than a +set of single-character alternatives such as (a|e|i|o|u). In general, the +simplest construction that provides the required behaviour is usually the most +efficient. Jeffrey Friedl's book contains a lot of useful general discussion +about optimizing regular expressions for efficient performance. This document +contains a few observations about PCRE2. +.P +Using Unicode character properties (the \ep, \eP, and \eX escapes) is slow, +because PCRE2 has to use a multi-stage table lookup whenever it needs a +character's property. If you can find an alternative pattern that does not use +character properties, it will probably be faster. +.P +By default, the escape sequences \eb, \ed, \es, and \ew, and the POSIX +character classes such as [:alpha:] do not use Unicode properties, partly for +backwards compatibility, and partly for performance reasons. However, you can +set the PCRE2_UCP option or start the pattern with (*UCP) if you want Unicode +character properties to be used. This can double the matching time for items +such as \ed, when matched with \fBpcre2_match()\fP; the performance loss is +less with a DFA matching function, and in both cases there is not much +difference for \eb. +.P +When a pattern begins with .* not in atomic parentheses, nor in parentheses +that are the subject of a backreference, and the PCRE2_DOTALL option is set, +the pattern is implicitly anchored by PCRE2, since it can match only at the +start of a subject string. If the pattern has multiple top-level branches, they +must all be anchorable. The optimization can be disabled by the +PCRE2_NO_DOTSTAR_ANCHOR option, and is automatically disabled if the pattern +contains (*PRUNE) or (*SKIP). +.P +If PCRE2_DOTALL is not set, PCRE2 cannot make this optimization, because the +dot metacharacter does not then match a newline, and if the subject string +contains newlines, the pattern may match from the character immediately +following one of them instead of from the very start. For example, the pattern +.sp + .*second +.sp +matches the subject "first\enand second" (where \en stands for a newline +character), with the match starting at the seventh character. In order to do +this, PCRE2 has to retry the match starting after every newline in the subject. +.P +If you are using such a pattern with subject strings that do not contain +newlines, the best performance is obtained by setting PCRE2_DOTALL, or starting +the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE2 +from having to scan along the subject looking for a newline to restart at. +.P +Beware of patterns that contain nested indefinite repeats. These can take a +long time to run when applied to a string that does not match. Consider the +pattern fragment +.sp + ^(a+)* +.sp +This can match "aaaa" in 16 different ways, and this number increases very +rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4 +times, and for each of those cases other than 0 or 4, the + repeats can match +different numbers of times.) When the remainder of the pattern is such that the +entire match is going to fail, PCRE2 has in principle to try every possible +variation, and this can take an extremely long time, even for relatively short +strings. +.P +An optimization catches some of the more simple cases such as +.sp + (a+)*b +.sp +where a literal character follows. Before embarking on the standard matching +procedure, PCRE2 checks that there is a "b" later in the subject string, and if +there is not, it fails the match immediately. However, when there is no +following literal this optimization cannot be used. You can see the difference +by comparing the behaviour of +.sp + (a+)*\ed +.sp +with the pattern above. The former gives a failure almost instantly when +applied to a whole line of "a" characters, whereas the latter takes an +appreciable time with strings longer than about 20 characters. +.P +In many cases, the solution to this kind of performance issue is to use an +atomic group or a possessive quantifier. This can often reduce memory +requirements as well. As another example, consider this pattern: +.sp + ([^<]|<(?!inet))+ +.sp +It matches from wherever it starts until it encounters " +.\" +"The match context" +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +The \fBpcre2test\fP test program has a modifier called "find_limits" which, if +applied to a subject line, causes it to find the smallest limits that allow a +pattern to match. This is done by repeatedly matching with different limits. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 06 December 2022 +Copyright (c) 1997-2022 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2posix.3 b/3rd/pcre2/doc/pcre2posix.3 new file mode 100644 index 00000000..c4a9272a --- /dev/null +++ b/3rd/pcre2/doc/pcre2posix.3 @@ -0,0 +1,348 @@ +.TH PCRE2POSIX 3 "27 November 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "SYNOPSIS" +.rs +.sp +.B #include +.PP +.nf +.B int pcre2_regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP, +.B " int \fIcflags\fP);" +.sp +.B int pcre2_regexec(const regex_t *\fIpreg\fP, const char *\fIstring\fP, +.B " size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);" +.sp +.B "size_t pcre2_regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP," +.B " char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);" +.sp +.B void pcre2_regfree(regex_t *\fIpreg\fP); +.fi +. +.SH DESCRIPTION +.rs +.sp +This set of functions provides a POSIX-style API for the PCRE2 regular +expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit +and 32-bit libraries. See the +.\" HREF +\fBpcre2api\fP +.\" +documentation for a description of PCRE2's native API, which contains much +additional functionality. +.P +\fBIMPORTANT NOTE\fP: The functions described here are NOT thread-safe, and +should not be used in multi-threaded applications. They are also limited to +processing subjects that are not bigger than 2GB. Use the native API instead. +.P +These functions are wrapper functions that ultimately call the PCRE2 native +API. Their prototypes are defined in the \fBpcre2posix.h\fP header file, and +they all have unique names starting with \fBpcre2_\fP. However, the +\fBpcre2posix.h\fP header also contains macro definitions that convert the +standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This +means that a program can use the usual POSIX names without running the risk of +accidentally linking with POSIX functions from a different library. +.P +On Unix-like systems the PCRE2 POSIX library is called \fBlibpcre2-posix\fP, so +can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an +application. Because the POSIX functions call the native ones, it is also +necessary to add \fB-lpcre2-8\fP. +.P +On Windows systems, if you are linking to a DLL version of the library, it is +recommended that \fBPCRE2POSIX_SHARED\fP is defined before including the +\fBpcre2posix.h\fP header, as it will allow for a more efficient way to +invoke the functions by adding the \fB__declspec(dllimport)\fP decorator. +.P +Although they were not defined as prototypes in \fBpcre2posix.h\fP, releases +10.33 to 10.36 of the library contained functions with the POSIX names +\fBregcomp()\fP etc. These simply passed their arguments to the PCRE2 +functions. These functions were provided for backwards compatibility with +earlier versions of PCRE2, which had only POSIX names. However, this has proved +troublesome in situations where a program links with several libraries, some of +which use PCRE2's POSIX interface while others use the real POSIX functions. +For this reason, the POSIX names have been removed since release 10.37. +.P +Calling the header file \fBpcre2posix.h\fP avoids any conflict with other POSIX +libraries. It can, of course, be renamed or aliased as \fBregex.h\fP, which is +the "correct" name, if there is no clash. It provides two structure types, +\fIregex_t\fP for compiled internal forms, and \fIregmatch_t\fP for returning +captured substrings. It also defines some constants whose names start with +"REG_"; these are used for setting options and identifying error codes. +. +. +.SH "USING THE POSIX FUNCTIONS" +.rs +.sp +Note that these functions are just POSIX-style wrappers for PCRE2's native API. +They do not give POSIX regular expression behaviour, and they are not +thread-safe or even POSIX compatible. +.P +Those POSIX option bits that can reasonably be mapped to PCRE2 native options +have been implemented. In addition, the option REG_EXTENDED is defined with the +value zero. This has no effect, but since programs that are written to the +POSIX interface often use it, this makes it easier to slot in PCRE2 as a +replacement library. Other POSIX options are not even defined. +.P +There are also some options that are not defined by POSIX. These have been +added at the request of users who want to make use of certain PCRE2-specific +features via the POSIX calling interface or to add BSD or GNU functionality. +.P +When PCRE2 is called via these functions, it is only the API that is POSIX-like +in style. The syntax and semantics of the regular expressions themselves are +still those of Perl, subject to the setting of various PCRE2 options, as +described below. "POSIX-like in style" means that the API approximates to the +POSIX definition; it is not fully POSIX-compatible, and in multi-unit encoding +domains it is probably even less compatible. +.P +The descriptions below use the actual names of the functions, but, as described +above, the standard POSIX names (without the \fBpcre2_\fP prefix) may also be +used. +. +. +.SH "COMPILING A PATTERN" +.rs +.sp +The function \fBpcre2_regcomp()\fP is called to compile a pattern into an +internal form. By default, the pattern is a C string terminated by a binary +zero (but see REG_PEND below). The \fIpreg\fP argument is a pointer to a +\fBregex_t\fP structure that is used as a base for storing information about +the compiled regular expression. It is also used for input when REG_PEND is +set. The \fBregex_t\fP structure used by \fBpcre2_regcomp()\fP is defined in +\fBpcre2posix.h\fP and is not the same as the structure used by other libraries +that provide POSIX-style matching. +.P +The argument \fIcflags\fP is either zero, or contains one or more of the bits +defined by the following macros: +.sp + REG_DOTALL +.sp +The PCRE2_DOTALL option is set when the regular expression is passed for +compilation to the native function. Note that REG_DOTALL is not part of the +POSIX standard. +.sp + REG_ICASE +.sp +The PCRE2_CASELESS option is set when the regular expression is passed for +compilation to the native function. +.sp + REG_NEWLINE +.sp +The PCRE2_MULTILINE option is set when the regular expression is passed for +compilation to the native function. Note that this does \fInot\fP mimic the +defined POSIX behaviour for REG_NEWLINE (see the following section). +.sp + REG_NOSPEC +.sp +The PCRE2_LITERAL option is set when the regular expression is passed for +compilation to the native function. This disables all meta characters in the +pattern, causing it to be treated as a literal string. The only other options +that are allowed with REG_NOSPEC are REG_ICASE, REG_NOSUB, REG_PEND, and +REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard. +.sp + REG_NOSUB +.sp +When a pattern that is compiled with this flag is passed to +\fBpcre2_regexec()\fP for matching, the \fInmatch\fP and \fIpmatch\fP arguments +are ignored, and no captured strings are returned. Versions of the PCRE2 library +prior to 10.22 used to set the PCRE2_NO_AUTO_CAPTURE compile option, but this +no longer happens because it disables the use of backreferences. +.sp + REG_PEND +.sp +If this option is set, the \fBreg_endp\fP field in the \fIpreg\fP structure +(which has the type const char *) must be set to point to the character beyond +the end of the pattern before calling \fBpcre2_regcomp()\fP. The pattern itself +may now contain binary zeros, which are treated as data characters. Without +REG_PEND, a binary zero terminates the pattern and the \fBre_endp\fP field is +ignored. This is a GNU extension to the POSIX standard and should be used with +caution in software intended to be portable to other systems. +.sp + REG_UCP +.sp +The PCRE2_UCP option is set when the regular expression is passed for +compilation to the native function. This causes PCRE2 to use Unicode properties +when matching \ed, \ew, etc., instead of just recognizing ASCII values. Note +that REG_UCP is not part of the POSIX standard. +.sp + REG_UNGREEDY +.sp +The PCRE2_UNGREEDY option is set when the regular expression is passed for +compilation to the native function. Note that REG_UNGREEDY is not part of the +POSIX standard. +.sp + REG_UTF +.sp +The PCRE2_UTF option is set when the regular expression is passed for +compilation to the native function. This causes the pattern itself and all data +strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF +is not part of the POSIX standard. +.P +In the absence of these flags, no options are passed to the native function. +This means that the regex is compiled with PCRE2 default semantics. In +particular, the way it handles newline characters in the subject string is the +Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only +\fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way +newlines are matched by the dot metacharacter (they are not) or by a negative +class such as [^a] (they are). +.P +The yield of \fBpcre2_regcomp()\fP is zero on success, and non-zero otherwise. +The \fIpreg\fP structure is filled in on success, and one other member of the +structure (as well as \fIre_endp\fP) is public: \fIre_nsub\fP contains the +number of capturing subpatterns in the regular expression. Various error codes +are defined in the header file. +.P +NOTE: If the yield of \fBpcre2_regcomp()\fP is non-zero, you must not attempt +to use the contents of the \fIpreg\fP structure. If, for example, you pass it +to \fBpcre2_regexec()\fP, the result is undefined and your program is likely to +crash. +. +. +.SH "MATCHING NEWLINE CHARACTERS" +.rs +.sp +This area is not simple, because POSIX and Perl take different views of things. +It is not possible to get PCRE2 to obey POSIX semantics, but then PCRE2 was +never intended to be a POSIX engine. The following table lists the different +possibilities for matching newline characters in Perl and PCRE2: +.sp + Default Change with +.sp + . matches newline no PCRE2_DOTALL + newline matches [^a] yes not changeable + $ matches \en at end yes PCRE2_DOLLAR_ENDONLY + $ matches \en in middle no PCRE2_MULTILINE + ^ matches \en in middle no PCRE2_MULTILINE +.sp +This is the equivalent table for a POSIX-compatible pattern matcher: +.sp + Default Change with +.sp + . matches newline yes REG_NEWLINE + newline matches [^a] yes REG_NEWLINE + $ matches \en at end no REG_NEWLINE + $ matches \en in middle no REG_NEWLINE + ^ matches \en in middle no REG_NEWLINE +.sp +This behaviour is not what happens when PCRE2 is called via its POSIX +API. By default, PCRE2's behaviour is the same as Perl's, except that there is +no equivalent for PCRE2_DOLLAR_ENDONLY in Perl. In both PCRE2 and Perl, there +is no way to stop newline from matching [^a]. +.P +Default POSIX newline handling can be obtained by setting PCRE2_DOTALL and +PCRE2_DOLLAR_ENDONLY when calling \fBpcre2_compile()\fP directly, but there is +no way to make PCRE2 behave exactly as for the REG_NEWLINE action. When using +the POSIX API, passing REG_NEWLINE to PCRE2's \fBpcre2_regcomp()\fP function +causes PCRE2_MULTILINE to be passed to \fBpcre2_compile()\fP, and REG_DOTALL +passes PCRE2_DOTALL. There is no way to pass PCRE2_DOLLAR_ENDONLY. +. +. +.SH "MATCHING A PATTERN" +.rs +.sp +The function \fBpcre2_regexec()\fP is called to match a compiled pattern +\fIpreg\fP against a given \fIstring\fP, which is by default terminated by a +zero byte (but see REG_STARTEND below), subject to the options in \fIeflags\fP. +These can be: +.sp + REG_NOTBOL +.sp +The PCRE2_NOTBOL option is set when calling the underlying PCRE2 matching +function. +.sp + REG_NOTEMPTY +.sp +The PCRE2_NOTEMPTY option is set when calling the underlying PCRE2 matching +function. Note that REG_NOTEMPTY is not part of the POSIX standard. However, +setting this option can give more POSIX-like behaviour in some situations. +.sp + REG_NOTEOL +.sp +The PCRE2_NOTEOL option is set when calling the underlying PCRE2 matching +function. +.sp + REG_STARTEND +.sp +When this option is set, the subject string starts at \fIstring\fP + +\fIpmatch[0].rm_so\fP and ends at \fIstring\fP + \fIpmatch[0].rm_eo\fP, which +should point to the first character beyond the string. There may be binary +zeros within the subject string, and indeed, using REG_STARTEND is the only +way to pass a subject string that contains a binary zero. +.P +Whatever the value of \fIpmatch[0].rm_so\fP, the offsets of the matched string +and any captured substrings are still given relative to the start of +\fIstring\fP itself. (Before PCRE2 release 10.30 these were given relative to +\fIstring\fP + \fIpmatch[0].rm_so\fP, but this differs from other +implementations.) +.P +This is a BSD extension, compatible with but not specified by IEEE Standard +1003.2 (POSIX.2), and should be used with caution in software intended to be +portable to other systems. Note that a non-zero \fIrm_so\fP does not imply +REG_NOTBOL; REG_STARTEND affects only the location and length of the string, +not how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL +are mutually exclusive; the error REG_INVARG is returned. +.P +If the pattern was compiled with the REG_NOSUB flag, no data about any matched +strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of +\fBpcre2_regexec()\fP are ignored (except possibly as input for REG_STARTEND). +.P +The value of \fInmatch\fP may be zero, and the value \fIpmatch\fP may be NULL +(unless REG_STARTEND is set); in both these cases no data about any matched +strings is returned. +.P +Otherwise, the portion of the string that was matched, and also any captured +substrings, are returned via the \fIpmatch\fP argument, which points to an +array of \fInmatch\fP structures of type \fIregmatch_t\fP, containing the +members \fIrm_so\fP and \fIrm_eo\fP. These contain the byte offset to the first +character of each substring and the offset to the first character after the end +of each substring, respectively. The 0th element of the vector relates to the +entire portion of \fIstring\fP that was matched; subsequent elements relate to +the capturing subpatterns of the regular expression. Unused entries in the +array have both structure members set to -1. +.P +\fIregmatch_t\fP as well as the \fIregoff_t\fP typedef it uses are defined in +\fBpcre2posix.h\fP and are not warranted to have the same size or layout as other +similarly named types from other libraries that provide POSIX-style matching. +.P +A successful match yields a zero return; various error codes are defined in the +header file, of which REG_NOMATCH is the "expected" failure code. +. +. +.SH "ERROR MESSAGES" +.rs +.sp +The \fBpcre2_regerror()\fP function maps a non-zero errorcode from either +\fBpcre2_regcomp()\fP or \fBpcre2_regexec()\fP to a printable message. If +\fIpreg\fP is not NULL, the error should have arisen from the use of that +structure. A message terminated by a binary zero is placed in \fIerrbuf\fP. If +the buffer is too short, only the first \fIerrbuf_size\fP - 1 characters of the +error message are used. The yield of the function is the size of buffer needed +to hold the whole message, including the terminating zero. This value is +greater than \fIerrbuf_size\fP if the message was truncated. +. +. +.SH MEMORY USAGE +.rs +.sp +Compiling a regular expression causes memory to be allocated and associated +with the \fIpreg\fP structure. The function \fBpcre2_regfree()\fP frees all +such memory, after which \fIpreg\fP may no longer be used as a compiled +expression. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 November 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2sample.3 b/3rd/pcre2/doc/pcre2sample.3 new file mode 100644 index 00000000..e9fecd1d --- /dev/null +++ b/3rd/pcre2/doc/pcre2sample.3 @@ -0,0 +1,99 @@ +.TH PCRE2SAMPLE 3 "14 November 2023" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 SAMPLE PROGRAM" +.rs +.sp +A simple, complete demonstration program to get you started with using PCRE2 is +supplied in the file \fIpcre2demo.c\fP in the \fBsrc\fP directory in the PCRE2 +distribution. A listing of this program is given in the +.\" HREF +\fBpcre2demo\fP +.\" +documentation. If you do not have a copy of the PCRE2 distribution, you can +save this listing to re-create the contents of \fIpcre2demo.c\fP. +.P +The demonstration program compiles the regular expression that is its +first argument, and matches it against the subject string in its second +argument. No PCRE2 options are set, and default character tables are used. If +matching succeeds, the program outputs the portion of the subject that matched, +together with the contents of any captured substrings. +.P +If the -g option is given on the command line, the program then goes on to +check for further matches of the same regular expression in the same subject +string. The logic is a little bit tricky because of the possibility of matching +an empty string. Comments in the code explain what is going on. +.P +The code in \fBpcre2demo.c\fP is an 8-bit program that uses the PCRE2 8-bit +library. It handles strings and characters that are stored in 8-bit code units. +By default, one character corresponds to one code unit, but if the pattern +starts with "(*UTF)", both it and the subject are treated as UTF-8 strings, +where characters may occupy multiple code units. +.P +If PCRE2 is installed in the standard include and library directories for your +operating system, you should be able to compile the demonstration program using +a command like this: +.sp + cc -o pcre2demo pcre2demo.c -lpcre2-8 +.sp +If PCRE2 is installed elsewhere, you may need to add additional options to the +command line. For example, on a Unix-like system that has PCRE2 installed in +\fI/usr/local\fP, you can compile the demonstration program using a command +like this: +.sp +.\" JOINSH + cc -o pcre2demo -I/usr/local/include pcre2demo.c \e + -L/usr/local/lib -lpcre2-8 +.sp +Once you have built the demonstration program, you can run simple tests like +this: +.sp + ./pcre2demo 'cat|dog' 'the cat sat on the mat' + ./pcre2demo -g 'cat|dog' 'the dog sat on the cat' +.sp +Note that there is a much more comprehensive test program, called +.\" HREF +\fBpcre2test\fP, +.\" +which supports many more facilities for testing regular expressions using all +three PCRE2 libraries (8-bit, 16-bit, and 32-bit, though not all three need be +installed). The +.\" HREF +\fBpcre2demo\fP +.\" +program is provided as a relatively simple coding example. +.P +If you try to run +.\" HREF +\fBpcre2demo\fP +.\" +when PCRE2 is not installed in the standard library directory, you may get an +error like this on some operating systems (e.g. Solaris): +.sp + ld.so.1: pcre2demo: fatal: libpcre2-8.so.0: open failed: No such file or directory +.sp +This is caused by the way shared library support works on those systems. You +need to add +.sp + -R/usr/local/lib +.sp +(for example) to the compile command to get round this problem. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 14 November 2023 +Copyright (c) 1997-2016 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2serialize.3 b/3rd/pcre2/doc/pcre2serialize.3 new file mode 100644 index 00000000..d52507e0 --- /dev/null +++ b/3rd/pcre2/doc/pcre2serialize.3 @@ -0,0 +1,198 @@ +.TH PCRE2SERIALIZE 3 "19 January 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS" +.rs +.sp +.nf +.B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," +.B " pcre2_general_context *\fIgcontext\fP);" +.sp +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," +.B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" +.sp +.B void pcre2_serialize_free(uint8_t *\fIbytes\fP); +.sp +.B int32_t pcre2_serialize_get_number_of_codes(const uint8_t *\fIbytes\fP); +.fi +.sp +If you are running an application that uses a large number of regular +expression patterns, it may be useful to store them in a precompiled form +instead of having to compile them every time the application is run. However, +if you are using the just-in-time optimization feature, it is not possible to +save and reload the JIT data, because it is position-dependent. The host on +which the patterns are reloaded must be running the same version of PCRE2, with +the same code unit width, and must also have the same endianness, pointer width +and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using +PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be +reloaded using the 8-bit library. +.P +Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET serialization. The serialized output is +really just a bytecode dump, which is why it can only be reloaded in the same +environment as the one that created it. Hence the restrictions mentioned above. +Applications that are not statically linked with a fixed version of PCRE2 must +be prepared to recompile patterns from their sources, in order to be immune to +PCRE2 upgrades. +. +. +.SH "SECURITY CONCERNS" +.rs +.sp +The facility for saving and restoring compiled patterns is intended for use +within individual applications. As such, the data supplied to +\fBpcre2_serialize_decode()\fP is expected to be trusted data, not data from +arbitrary external sources. There is only some simple consistency checking, not +complete validation of what is being re-loaded. Corrupted data may cause +undefined results. For example, if the length field of a pattern in the +serialized data is corrupted, the deserializing code may read beyond the end of +the byte stream that is passed to it. +. +. +.SH "SAVING COMPILED PATTERNS" +.rs +.sp +Before compiled patterns can be saved they must be serialized, which in PCRE2 +means converting the pattern to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). For more details of character tables, see the +.\" HTML +.\" +section on locale support +.\" +in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +The function \fBpcre2_serialize_encode()\fP creates a serialized byte stream +from a list of compiled patterns. Its first two arguments specify the list, +being a pointer to a vector of pointers to compiled patterns, and the length of +the vector. The third and fourth arguments point to variables which are set to +point to the created byte stream and its length, respectively. The final +argument is a pointer to a general context, which can be used to specify custom +memory management functions. If this argument is NULL, \fBmalloc()\fP is used +to obtain memory for the byte stream. The yield of the function is the number +of serialized patterns, or one of the following negative error codes: +.sp + PCRE2_ERROR_BADDATA the number of patterns is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in one of the patterns + PCRE2_ERROR_NOMEMORY memory allocation failed + PCRE2_ERROR_MIXEDTABLES the patterns do not all use the same tables + PCRE2_ERROR_NULL the 1st, 3rd, or 4th argument is NULL +.sp +PCRE2_ERROR_BADMAGIC means either that a pattern's code has been corrupted, or +that a slot in the vector does not point to a compiled pattern. +.P +Once a set of patterns has been serialized you can save the data in any +appropriate manner. Here is sample code that compiles two patterns and writes +them to a file. It assumes that the variable \fIfd\fP refers to a file that is +open for output. The error checking that should be present in a real +application has been omitted for simplicity. +.sp + int errorcode; + uint8_t *bytes; + PCRE2_SIZE erroroffset; + PCRE2_SIZE bytescount; + pcre2_code *list_of_codes[2]; + list_of_codes[0] = pcre2_compile("first pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + list_of_codes[1] = pcre2_compile("second pattern", + PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL); + errorcode = pcre2_serialize_encode(list_of_codes, 2, &bytes, + &bytescount, NULL); + errorcode = fwrite(bytes, 1, bytescount, fd); +.sp +Note that the serialized data is binary data that may contain any of the 256 +possible byte values. On systems that make a distinction between binary and +non-binary data, be sure that the file is opened for binary output. +.P +Serializing a set of patterns leaves the original data untouched, so they can +still be used for matching. Their memory must eventually be freed in the usual +way by calling \fBpcre2_code_free()\fP. When you have finished with the byte +stream, it too must be freed by calling \fBpcre2_serialize_free()\fP. If this +function is called with a NULL argument, it returns immediately without doing +anything. +. +. +.SH "RE-USING PRECOMPILED PATTERNS" +.rs +.sp +In order to re-use a set of saved patterns you must first make the serialized +byte stream available in main memory (for example, by reading from a file). The +management of this memory block is up to the application. You can use the +\fBpcre2_serialize_get_number_of_codes()\fP function to find out how many +compiled patterns are in the serialized data without actually decoding the +patterns: +.sp + uint8_t *bytes = ; + int32_t number_of_codes = pcre2_serialize_get_number_of_codes(bytes); +.sp +The \fBpcre2_serialize_decode()\fP function reads a byte stream and recreates +the compiled patterns in new memory blocks, setting pointers to them in a +vector. The first two arguments are a pointer to a suitable vector and its +length, and the third argument points to a byte stream. The final argument is a +pointer to a general context, which can be used to specify custom memory +management functions for the decoded patterns. If this argument is NULL, +\fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte +stream is no longer needed and can be discarded. +.sp + pcre2_code *list_of_codes[2]; + uint8_t *bytes = ; + int32_t number_of_codes = + pcre2_serialize_decode(list_of_codes, 2, bytes, NULL); +.sp +If the vector is not large enough for all the patterns in the byte stream, it +is filled with those that fit, and the remainder are ignored. The yield of the +function is the number of decoded patterns, or one of the following negative +error codes: +.sp + PCRE2_ERROR_BADDATA second argument is zero or less + PCRE2_ERROR_BADMAGIC mismatch of id bytes in the data + PCRE2_ERROR_BADMODE mismatch of code unit size or PCRE2 version + PCRE2_ERROR_BADSERIALIZEDDATA other sanity check failure + PCRE2_ERROR_MEMORY memory allocation failed + PCRE2_ERROR_NULL first or third argument is NULL +.sp +PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled +on a system with different endianness. +.P +Decoded patterns can be used for matching in the usual way, and must be freed +by calling \fBpcre2_code_free()\fP. However, be aware that there is a potential +race issue if you are using multiple patterns that were decoded from a single +byte stream in a multithreaded application. A single copy of the character +tables is used by all the decoded patterns and a reference count is used to +arrange for its memory to be automatically freed when the last pattern is +freed, but there is no locking on this reference count. Therefore, if you want +to call \fBpcre2_code_free()\fP for these patterns in different threads, you +must arrange your own locking, and ensure that \fBpcre2_code_free()\fP cannot +be called by two threads at the same time. +.P +If a pattern was processed by \fBpcre2_jit_compile()\fP before being +serialized, the JIT data is discarded and so is no longer available after a +save/restore cycle. You can, however, process a restored pattern with +\fBpcre2_jit_compile()\fP if you wish. +. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 19 January 2024 +Copyright (c) 1997-2018 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2syntax.3 b/3rd/pcre2/doc/pcre2syntax.3 new file mode 100644 index 00000000..28305bdf --- /dev/null +++ b/3rd/pcre2/doc/pcre2syntax.3 @@ -0,0 +1,736 @@ +.TH PCRE2SYNTAX 3 "27 November 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" +.rs +.sp +The full syntax and semantics of the regular expression patterns that are +supported by PCRE2 are described in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. This document contains a quick-reference summary of the pattern +syntax followed by the syntax of replacement strings in substitution function. +The full description of the latter is in the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +. +.SH "QUOTING" +.rs +.sp + \ex where x is non-alphanumeric is a literal x + \eQ...\eE treat enclosed characters as literal +.sp +Note that white space inside \eQ...\eE is always treated as literal, even if +PCRE2_EXTENDED is set, causing most other white space to be ignored. Note also +that PCRE2's handling of \eQ...\eE has some differences from Perl's. See the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation for details. +. +. +.SH "BRACED ITEMS" +.rs +.sp +With one exception, wherever brace characters { and } are required to enclose +data for constructions such as \eg{2} or \ek{name}, space and/or horizontal tab +characters that follow { or precede } are allowed and are ignored. In the case +of quantifiers, they may also appear before or after the comma. The exception +is \eu{...} which is not Perl-compatible and is recognized only when +PCRE2_EXTRA_ALT_BSUX is set. This is an ECMAScript compatibility feature, and +follows ECMAScript's behaviour. +. +. +.SH "ESCAPED CHARACTERS" +.rs +.sp +This table applies to ASCII and Unicode environments. An unrecognized escape +sequence causes an error. +.sp + \ea alarm, that is, the BEL character (hex 07) + \ecx "control-x", where x is a non-control ASCII character + \ee escape (hex 1B) + \ef form feed (hex 0C) + \en newline (hex 0A) + \er carriage return (hex 0D) + \et tab (hex 09) + \e0dd character with octal code 0dd + \eddd character with octal code ddd, or backreference + \eo{ddd..} character with octal code ddd.. + \eN{U+hh..} character with Unicode code point hh.. (Unicode mode only) + \exhh character with hex code hh + \ex{hh..} character with hex code hh.. +.sp +\eN{U+hh..} is synonymous with \ex{hh..} but is not supported in environments +that use EBCDIC code (mainly IBM mainframes). Note that \eN not followed by an +opening curly bracket has a different meaning (see below). +.P +If PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX is set ("ALT_BSUX mode"), the +following are also recognized: +.sp + \eU the character "U" + \euhhhh character with hex code hhhh + \eu{hh..} character with hex code hh.. but only for EXTRA_ALT_BSUX +.sp +When \ex is not followed by {, one or two hexadecimal digits are read, +but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be +recognized as a hexadecimal escape; otherwise it matches a literal "x". +Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits +or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it +matches a literal "u". +.P +Note that \e0dd is always an octal code. The treatment of backslash followed by +a non-zero digit is complicated; for details see the section +.\" HTML +.\" +"Non-printing characters" +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation, where details of escape processing in EBCDIC environments are +also given. +. +. +.SH "CHARACTER TYPES" +.rs +.sp + . any character except newline; + in dotall mode, any character whatsoever + \eC one code unit, even in UTF mode (best avoided) + \ed a decimal digit + \eD a character that is not a decimal digit + \eh a horizontal white space character + \eH a character that is not a horizontal white space character + \eN a character that is not a newline + \ep{\fIxx\fP} a character with the \fIxx\fP property + \eP{\fIxx\fP} a character without the \fIxx\fP property + \eR a newline sequence + \es a white space character + \eS a character that is not a white space character + \ev a vertical white space character + \eV a character that is not a vertical white space character + \ew a "word" character + \eW a "non-word" character + \eX a Unicode extended grapheme cluster +.sp +\eC is dangerous because it may leave the current matching point in the middle +of a UTF-8 or UTF-16 character. The application can lock out the use of \eC by +setting the PCRE2_NEVER_BACKSLASH_C option. It is also possible to build PCRE2 +with the use of \eC permanently disabled. +.P +By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode +or in the 16-bit and 32-bit libraries. However, if locale-specific matching is +happening, \es and \ew may also match characters with code points in the range +128-255. If the PCRE2_UCP option is set, the behaviour of these escape +sequences is changed to use Unicode properties and they match many more +characters, but there are some option settings that can restrict individual +sequences to matching only ASCII characters. +.P +Property descriptions in \ep and \eP are matched caselessly; hyphens, +underscores, and ASCII white space characters are ignored, in accordance with +Unicode's "loose matching" rules. For example, \ep{Bidi_Class=al} is the same +as \ep{ bidi class = AL }. +. +. +.SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP" +.rs +.sp + C Other + Cc Control + Cf Format + Cn Unassigned + Co Private use + Cs Surrogate +.sp + L Letter + Lc Cased letter, the union of Ll, Lu, and Lt + L& Synonym of Lc + Ll Lower case letter + Lm Modifier letter + Lo Other letter + Lt Title case letter + Lu Upper case letter +.sp + M Mark + Mc Spacing mark + Me Enclosing mark + Mn Non-spacing mark +.sp + N Number + Nd Decimal number + Nl Letter number + No Other number +.sp + P Punctuation + Pc Connector punctuation + Pd Dash punctuation + Pe Close punctuation + Pf Final punctuation + Pi Initial punctuation + Po Other punctuation + Ps Open punctuation +.sp + S Symbol + Sc Currency symbol + Sk Modifier symbol + Sm Mathematical symbol + So Other symbol +.sp + Z Separator + Zl Line separator + Zp Paragraph separator + Zs Space separator +.sp +From release 10.45, when caseless matching is set, Ll, Lu, and Lt are all +equivalent to Lc. +. +. +.SH "PCRE2 SPECIAL CATEGORY PROPERTIES FOR \ep and \eP" +.rs +.sp + Xan Alphanumeric: union of properties L and N + Xps POSIX space: property Z or tab, NL, VT, FF, CR + Xsp Perl space: property Z or tab, NL, VT, FF, CR + Xuc Universally-named character: one that can be + represented by a Universal Character Name + Xwd Perl word: property Xan or underscore +.sp +Perl and POSIX space are now the same. Perl added VT to its space character set +at release 5.18. +. +. +.SH "BINARY PROPERTIES FOR \ep AND \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +. +. +. +.SH "SCRIPT MATCHING WITH \ep AND \eP" +.rs +.sp +Many script names and their 4-letter abbreviations are recognized in +\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of +course). You can obtain a list of these scripts by running this command: +.sp + pcre2test -LS +. +. +. +.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP" +.rs +.sp + \ep{Bidi_Class:} matches a character with the given class + \ep{BC:} matches a character with the given class +.sp +The recognized classes are: +.sp + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS white space +. +. +.SH "CHARACTER CLASSES" +.rs +.sp + [...] positive character class + [^...] negative character class + [x-y] range (can be used for hex characters) + [[:xxx:]] positive POSIX named set + [[:^xxx:]] negative POSIX named set +.sp + alnum alphanumeric + alpha alphabetic + ascii 0-127 + blank space or tab + cntrl control character + digit decimal digit + graph printing, excluding space + lower lower case letter + print printing, including space + punct printing, excluding alphanumeric + space white space + upper upper case letter + word same as \ew + xdigit hexadecimal digit +.sp +In PCRE2, POSIX character set names recognize only ASCII characters by default, +but some of them use Unicode properties if PCRE2_UCP is set. You can use +\eQ...\eE inside a character class. +.P +When PCRE2_ALT_EXTENDED_CLASS is set, UTS#18 extended character classes may be +used, allowing nested character classes, combined using set operators. +.sp + [x&&[^y]] UTS#18 extended character class +.sp + x||y set union (OR) + x&&y set intersection (AND) + x--y set difference (AND NOT) + x~~y set symmetric difference (XOR) +.sp +. +. +.SH "PERL EXTENDED CHARACTER CLASSES" +.rs +.sp + (?[...]) Perl extended character class + (?[\ep{Thai} & \ep{Nd}]) operators; whitespace ignored + (?[(x - y) & z]) parentheses for grouping +.sp + (?[ [^3] & \ep{Nd} ]) [...] is a nested ordinary class + (?[ [:alpha:] - [z] ]) POSIX set is allowed outside [...] + (?[ \ed - [3] ]) backslash-escaped set is allowed outside [...] + (?[ !\en & [:ascii:] ]) backslash-escaped character is allowed outside [...] + all other characters or ranges must be enclosed in [...] +.sp + x|y, x+y set union (OR) + x&y set intersection (AND) + x-y set difference (AND NOT) + x^y set symmetric difference (XOR) + !x set complement (NOT) +.sp +Inside a Perl extended character class, [...] switches mode to be interpreted +as an ordinary character class. Outside of a nested [...], the only items +permitted are backslash-escapes, POSIX sets, operators, and parentheses. Inside +a nested ordinary class, ^ has its usual meaning (inverts the class when used +as the first character); outside of a nested class, ^ is the XOR operator. +. +. +.SH "QUANTIFIERS" +.rs +.sp + ? 0 or 1, greedy + ?+ 0 or 1, possessive + ?? 0 or 1, lazy + * 0 or more, greedy + *+ 0 or more, possessive + *? 0 or more, lazy + + 1 or more, greedy + ++ 1 or more, possessive + +? 1 or more, lazy + {n} exactly n + {n,m} at least n, no more than m, greedy + {n,m}+ at least n, no more than m, possessive + {n,m}? at least n, no more than m, lazy + {n,} n or more, greedy + {n,}+ n or more, possessive + {n,}? n or more, lazy + {,m} zero up to m, greedy + {,m}+ zero up to m, possessive + {,m}? zero up to m, lazy +. +. +.SH "ANCHORS AND SIMPLE ASSERTIONS" +.rs +.sp + \eb word boundary + \eB not a word boundary + ^ start of subject + also after an internal newline in multiline mode + (after any newline if PCRE2_ALT_CIRCUMFLEX is set) + \eA start of subject + $ end of subject + also before newline at end of subject + also before internal newline in multiline mode + \eZ end of subject + also before newline at end of subject + \ez end of subject + \eG first matching position in subject +. +. +.SH "REPORTED MATCH POINT SETTING" +.rs +.sp + \eK set reported start of match +.sp +From release 10.38 \eK is not permitted by default in lookaround assertions, +for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK +option is set, the previous behaviour is re-enabled. When this option is set, +\eK is honoured in positive assertions, but ignored in negative ones. +. +. +.SH "ALTERNATION" +.rs +.sp + expr|expr|expr... +. +. +.SH "CAPTURING" +.rs +.sp + (...) capture group + (?...) named capture group (Perl) + (?'name'...) named capture group (Perl) + (?P...) named capture group (Python) + (?:...) non-capture group + (?|...) non-capture group; reset group numbers for + capture groups in each alternative +.sp +In non-UTF modes, names may contain underscores and ASCII letters and digits; +in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In +both cases, a name must not start with a digit. +. +. +.SH "ATOMIC GROUPS" +.rs +.sp + (?>...) atomic non-capture group + (*atomic:...) atomic non-capture group +. +. +.SH "COMMENT" +.rs +.sp + (?#....) comment (not nestable) +. +. +.SH "OPTION SETTING" +.rs +Changes of these options within a group are automatically cancelled at the end +of the group. +.sp + (?a) all ASCII options + (?aD) restrict \ed to ASCII in UCP mode + (?aS) restrict \es to ASCII in UCP mode + (?aW) restrict \ew to ASCII in UCP mode + (?aP) restrict all POSIX classes to ASCII in UCP mode + (?aT) restrict POSIX digit classes to ASCII in UCP mode + (?i) caseless + (?J) allow duplicate named groups + (?m) multiline + (?n) no auto capture + (?r) restrict caseless to either ASCII or non-ASCII + (?s) single line (dotall) + (?U) default ungreedy (lazy) + (?x) ignore white space except in classes or \eQ...\eE + (?xx) as (?x) but also ignore space and tab in classes + (?-...) unset the given option(s) + (?^) unset imnrsx options +.sp +(?aP) implies (?aT) as well, though this has no additional effect. However, it +means that (?-aP) also implies (?-aT) and disables all ASCII restrictions for +POSIX classes. +.P +Unsetting x or xx unsets both. Several options may be set at once, and a +mixture of setting and unsetting such as (?i-x) is allowed, but there may be +only one hyphen. Setting (but no unsetting) is allowed after (?^ for example +(?^in). An option setting may appear at the start of a non-capture group, for +example (?i:...). +.P +The following are recognized only at the very start of a pattern or after one +of the newline or \eR sequences or options with similar syntax. More than one +of them may appear. For the first three, d is a decimal number. +.sp + (*LIMIT_DEPTH=d) set the backtracking limit to d + (*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes + (*LIMIT_MATCH=d) set the match limit to d + (*CASELESS_RESTRICT) set PCRE2_EXTRA_CASELESS_RESTRICT when matching + (*NOTEMPTY) set PCRE2_NOTEMPTY when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) + (*NO_DOTSTAR_ANCHOR) no .* anchoring (PCRE2_NO_DOTSTAR_ANCHOR) + (*NO_JIT) disable JIT optimization + (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) + (*TURKISH_CASING) set PCRE2_EXTRA_TURKISH_CASING when matching + (*UTF) set appropriate UTF mode for the library in use + (*UCP) set PCRE2_UCP (use Unicode properties for \ed etc) +.sp +Note that LIMIT_DEPTH, LIMIT_HEAP, and LIMIT_MATCH can only reduce the value of +the limits set by the caller of \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP, +not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The +application can lock out the use of (*UTF) and (*UCP) by setting the +PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time. +. +. +.SH "NEWLINE CONVENTION" +.rs +.sp +These are recognized only at the very start of the pattern or after option +settings with a similar syntax. +.sp + (*CR) carriage return only + (*LF) linefeed only + (*CRLF) carriage return followed by linefeed + (*ANYCRLF) all three of the above + (*ANY) any Unicode newline sequence + (*NUL) the NUL character (binary zero) +. +. +.SH "WHAT \eR MATCHES" +.rs +.sp +These are recognized only at the very start of the pattern or after option +setting with a similar syntax. +.sp + (*BSR_ANYCRLF) CR, LF, or CRLF + (*BSR_UNICODE) any Unicode newline sequence +. +. +.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS" +.rs +.sp + (?=...) ) + (*pla:...) ) positive lookahead + (*positive_lookahead:...) ) +.sp + (?!...) ) + (*nla:...) ) negative lookahead + (*negative_lookahead:...) ) +.sp + (?<=...) ) + (*plb:...) ) positive lookbehind + (*positive_lookbehind:...) ) +.sp + (? name + 'name' name +.sp +. +. +.SH "SCRIPT RUNS" +.rs +.sp + (*script_run:...) ) script run, can be backtracked into + (*sr:...) ) +.sp + (*atomic_script_run:...) ) atomic script run + (*asr:...) ) +. +. +.SH "BACKREFERENCES" +.rs +.sp + \en reference by number (can be ambiguous) + \egn reference by number + \eg{n} reference by number + \eg+n relative reference by number (PCRE2 extension) + \eg-n relative reference by number + \eg{+n} relative reference by number (PCRE2 extension) + \eg{-n} relative reference by number + \ek reference by name (Perl) + \ek'name' reference by name (Perl) + \eg{name} reference by name (Perl) + \ek{name} reference by name (.NET) + (?P=name) reference by name (Python) +. +. +.SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)" +.rs +.sp + (?R) recurse whole pattern + (?n) call subroutine by absolute number + (?+n) call subroutine by relative number + (?-n) call subroutine by relative number + (?&name) call subroutine by name (Perl) + (?P>name) call subroutine by name (Python) + \eg call subroutine by name (Oniguruma) + \eg'name' call subroutine by name (Oniguruma) + \eg call subroutine by absolute number (Oniguruma) + \eg'n' call subroutine by absolute number (Oniguruma) + \eg<+n> call subroutine by relative number (PCRE2 extension) + \eg'+n' call subroutine by relative number (PCRE2 extension) + \eg<-n> call subroutine by relative number (PCRE2 extension) + \eg'-n' call subroutine by relative number (PCRE2 extension) +. +. +.SH "CONDITIONAL PATTERNS" +.rs +.sp + (?(condition)yes-pattern) + (?(condition)yes-pattern|no-pattern) +.sp + (?(n) absolute reference condition + (?(+n) relative reference condition (PCRE2 extension) + (?(-n) relative reference condition (PCRE2 extension) + (?() named reference condition (Perl) + (?('name') named reference condition (Perl) + (?(name) named reference condition (PCRE2, deprecated) + (?(R) overall recursion condition + (?(Rn) specific numbered group recursion condition + (?(R&name) specific named group recursion condition + (?(DEFINE) define groups for reference + (?(VERSION[>]=n.m) test PCRE2 version + (?(assert) assertion condition +.sp +Note the ambiguity of (?(R) and (?(Rn) which might be named reference +conditions or recursion tests. Such a condition is interpreted as a reference +condition if the relevant named group exists. +. +. +.SH "BACKTRACKING CONTROL" +.rs +.sp +All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the +name is mandatory, for the others it is optional. (*SKIP) changes its behaviour +if :NAME is present. The others just set a name for passing back to the caller, +but this is not a name that (*SKIP) can see. The following act immediately they +are reached: +.sp + (*ACCEPT) force successful match + (*FAIL) force backtrack; synonym (*F) + (*MARK:NAME) set name to be passed back; synonym (*:NAME) +.sp +The following act only when a subsequent match failure causes a backtrack to +reach them. They all force a match failure, but they differ in what happens +afterwards. Those that advance the start-of-match point do so only if the +pattern is not anchored. +.sp + (*COMMIT) overall failure, no advance of starting point + (*PRUNE) advance to next starting character + (*SKIP) advance to current matching position + (*SKIP:NAME) advance to position corresponding to an earlier + (*MARK:NAME); if not found, the (*SKIP) is ignored + (*THEN) local failure, backtrack to next alternation +.sp +The effect of one of these verbs in a group called as a subroutine is confined +to the subroutine call. +. +. +.SH "CALLOUTS" +.rs +.sp + (?C) callout (assumed number 0) + (?Cn) callout with numerical data n + (?C"text") callout with string data +.sp +The allowed string delimiters are ` ' " ^ % # $ (which are the same for the +start and the end), and the starting delimiter { matched with the ending +delimiter }. To encode the ending delimiter within the string, double it. +. +. +.SH "REPLACEMENT STRINGS" +.rs +.sp +If the PCRE2_SUBSTITUTE_LITERAL option is set, a replacement string for +\fBpcre2_substitute()\fP is not interpreted. Otherwise, by default, the only +special character is the dollar character in one of the following forms: +.sp + $$ insert a dollar character + $n or ${n} insert the contents of group \fIn\fP + $ insert the contents of named group + $0 or $& insert the entire matched substring + $` insert the substring that precedes the match + $' insert the substring that follows the match + $_ insert the entire input string + $*MARK or ${*MARK} insert a control verb name +.sp +For ${n}, n can be a name or a number. If PCRE2_SUBSTITUTE_EXTENDED is set, +there is additional interpretation: +.P +1. Backslash is an escape character, and the forms described in "ESCAPED +CHARACTERS" above are recognized. Also: +.sp + \eQ...\eE can be used to suppress interpretation + \el force the next character to lower case + \eu force the next character to upper case + \eL force subsequent characters to lower case + \eU force subsequent characters to upper case + \eu\eL force next character to upper case, then all lower + \el\eU force next character to lower case, then all upper + \eE end \eL or \eU case forcing + \eb backspace character (note: as in character class in pattern) + \ev vertical tab character (note: not the same as in a pattern) +.sp +2. The Python form \eg, where the angle brackets are part of the syntax and +\fIn\fP is either a group name or a number, is recognized as an alternative way +of inserting the contents of a group, for example \eg<3>. +.P +3. Capture substitution supports the following additional forms: +.sp + ${n:-string} default for unset group + ${n:+string1:string2} values for set/unset group +.sp +The substitution strings themselves are expanded. Backslash can be used to +escape colons and closing curly brackets. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2pattern\fP(3), \fBpcre2api\fP(3), \fBpcre2callout\fP(3), +\fBpcre2matching\fP(3), \fBpcre2\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 November 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2test.1 b/3rd/pcre2/doc/pcre2test.1 new file mode 100644 index 00000000..0a3ee21f --- /dev/null +++ b/3rd/pcre2/doc/pcre2test.1 @@ -0,0 +1,2246 @@ +.TH PCRE2TEST 1 "26 December 2024" "PCRE2 10.45" +.SH NAME +pcre2test - a program for testing Perl-compatible regular expressions. +.SH SYNOPSIS +.rs +.sp +.B pcre2test "[options] [input file [output file]]" +.sp +\fBpcre2test\fP is a test program for the PCRE2 regular expression libraries, +but it can also be used for experimenting with regular expressions. This +document describes the features of the test program; for details of the regular +expressions themselves, see the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. For details of the PCRE2 library function calls and their +options, see the +.\" HREF +\fBpcre2api\fP +.\" +documentation. +.P +The input for \fBpcre2test\fP is a sequence of regular expression patterns and +subject strings to be matched. There are also command lines for setting +defaults and controlling some special actions. The output shows the result of +each match attempt. Modifiers on external or internal command lines, the +patterns, and the subject lines specify PCRE2 function options, control how the +subject is processed, and what output is produced. +.P +There are many obscure modifiers, some of which are specifically designed for +use in conjunction with the test script and data files that are distributed as +part of PCRE2. All the modifiers are documented here, some without much +justification, but many of them are unlikely to be of use except when testing +the libraries. +. +. +.SH "PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES" +.rs +.sp +Different versions of the PCRE2 library can be built to support character +strings that are encoded in 8-bit, 16-bit, or 32-bit code units. One, two, or +all three of these libraries may be simultaneously installed. The +\fBpcre2test\fP program can be used to test all the libraries. However, its own +input and output are always in 8-bit format. When testing the 16-bit or 32-bit +libraries, patterns and subject strings are converted to 16-bit or 32-bit +format before being passed to the library functions. Results are converted back +to 8-bit code units for output. +.P +In the rest of this document, the names of library functions and structures +are given in generic form, for example, \fBpcre2_compile()\fP. The actual +names used in the libraries have a suffix _8, _16, or _32, as appropriate. +. +. +.\" HTML +.SH "INPUT ENCODING" +.rs +.sp +Input to \fBpcre2test\fP is processed line by line, either by calling the C +library's \fBfgets()\fP function, or via the \fBlibreadline\fP or \fBlibedit\fP +library. In some Windows environments character 26 (hex 1A) causes an immediate +end of file, and no further data is read, so this character should be avoided +unless you really want that action. +.P +The input is processed using C's string functions, so must not contain binary +zeros, even though in Unix-like environments, \fBfgets()\fP treats any bytes +other than newline as data characters. An error is generated if a binary zero +is encountered. By default subject lines are processed for backslash escapes, +which makes it possible to include any data value in strings that are passed to +the library for matching. For patterns, there is a facility for specifying some +or all of the 8-bit input characters as hexadecimal pairs, which makes it +possible to include binary zeros. +. +. +.SS "Input for the 16-bit and 32-bit libraries" +.rs +.sp +When testing the 16-bit or 32-bit libraries, there is a need to be able to +generate character code points greater than 255 in the strings that are passed +to the library. For subject lines and some patterns, backslash escapes can be +used. In addition, when the \fButf\fP modifier (see +.\" HTML +.\" +"Setting compilation options" +.\" +below) is set, the pattern and any following subject lines are interpreted as +UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate. +.P +For non-UTF testing of wide characters, the \fButf8_input\fP modifier can be +used. This is mutually exclusive with \fButf\fP, and is allowed only in 16-bit +or 32-bit mode. It causes the pattern and following subject lines to be treated +as UTF-8 according to the original definition (RFC 2279), which allows for +character values up to 0x7fffffff. Each character is placed in one 16-bit or +32-bit code unit (in the 16-bit case, values greater than 0xffff cause an error +to occur). +.P +UTF-8 (in its original definition) is not capable of encoding values greater +than 0x7fffffff, but such values can be handled by the 32-bit library. When +testing this library in non-UTF mode with \fButf8_input\fP set, if any +character is preceded by the byte 0xff (which is an invalid byte in UTF-8) +0x80000000 is added to the character's value. For subject strings, using an +escape sequence is preferable. +. +. +.SH "COMMAND LINE OPTIONS" +.rs +.TP 10 +\fB-8\fP +If the 8-bit library has been built, this option causes it to be used (this is +the default). If the 8-bit library has not been built, this option causes an +error. +.TP 10 +\fB-16\fP +If the 16-bit library has been built, this option causes it to be used. If the +8-bit library has not been built, this is the default. If the 16-bit library +has not been built, this option causes an error. +.TP 10 +\fB-32\fP +If the 32-bit library has been built, this option causes it to be used. If no +other library has been built, this is the default. If the 32-bit library has +not been built, this option causes an error. +.TP 10 +\fB-ac\fP +Behave as if each pattern has the \fBauto_callout\fP modifier, that is, insert +automatic callouts into every pattern that is compiled. +.TP 10 +\fB-AC\fP +As for \fB-ac\fP, but in addition behave as if each subject line has the +\fBcallout_extra\fP modifier, that is, show additional information from +callouts. +.TP 10 +\fB-b\fP +Behave as if each pattern has the \fBfullbincode\fP modifier; the full +internal binary form of the pattern is output after compilation. +.TP 10 +\fB-C\fP +Output the version number of the PCRE2 library, and all available information +about the optional features that are included, and then exit with zero exit +code. All other options are ignored. If both -C and -LM are present, whichever +is first is recognized. +.TP 10 +\fB-C\fP \fIoption\fP +Output information about a specific build-time option, then exit. This +functionality is intended for use in scripts such as \fBRunTest\fP. The +following options output the value and set the exit code as indicated: +.sp + ebcdic-nl the code for LF (= NL) in an EBCDIC environment: + either 0x15 or 0x25 + 0 if used in an ASCII/Unicode environment + exit code is always 0 + linksize the configured internal link size (2, 3, or 4) + exit code is set to the link size + newline the default newline setting: + CR, LF, CRLF, ANYCRLF, ANY, or NUL + exit code is always 0 + bsr the default setting for what \eR matches: + ANYCRLF or ANY + exit code is always 0 +.sp +The following options output 1 for true or 0 for false, and set the exit code +to the same value: +.sp + backslash-C \eC is supported (not locked out) + ebcdic compiled for an EBCDIC environment + jit just-in-time support is available + pcre2-16 the 16-bit library was built + pcre2-32 the 32-bit library was built + pcre2-8 the 8-bit library was built + unicode Unicode support is available +.sp +Note that the availability of JIT support in the library does not guarantee +that it can actually be used because in some environments it is unable to +allocate executable memory. The option "jitusable" gives more detailed +information. It returns one of the following values: +.sp + 0 JIT is available and usable + 1 JIT is available but cannot allocate executable memory + 2 JIT is not available + 3 Unexpected return from test call to \fBpcre2_jit_compile()\fP +.sp +If an unknown option is given, an error message is output; the exit code is 0. +.TP 10 +\fB-d\fP +Behave as if each pattern has the \fBdebug\fP modifier; the internal +form and information about the compiled pattern is output after compilation; +\fB-d\fP is equivalent to \fB-b -i\fP. +.TP 10 +\fB-dfa\fP +Behave as if each subject line has the \fBdfa\fP modifier; matching is done +using the \fBpcre2_dfa_match()\fP function instead of the default +\fBpcre2_match()\fP. +.TP 10 +\fB-error\fP \fInumber[,number,...]\fP +Call \fBpcre2_get_error_message()\fP for each of the error numbers in the +comma-separated list, display the resulting messages on the standard output, +then exit with zero exit code. The numbers may be positive or negative. This is +a convenience facility for PCRE2 maintainers. +.TP 10 +\fB-help\fP +Output a brief summary these options and then exit. +.TP 10 +\fB-i\fP +Behave as if each pattern has the \fBinfo\fP modifier; information about the +compiled pattern is given after compilation. +.TP 10 +\fB-jit\fP +Behave as if each pattern line has the \fBjit\fP modifier; after successful +compilation, each pattern is passed to the just-in-time compiler, if available. +.TP 10 +\fB-jitfast\fP +Behave as if each pattern line has the \fBjitfast\fP modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and each subject line is passed directly to the JIT matcher via its +"fast path". +.TP 10 +\fB-jitverify\fP +Behave as if each pattern line has the \fBjitverify\fP modifier; after +successful compilation, each pattern is passed to the just-in-time compiler, if +available, and the use of JIT for matching is verified. +.TP 10 +\fB-LM\fP +List modifiers: write a list of available pattern and subject modifiers to the +standard output, then exit with zero exit code. All other options are ignored. +If both -C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LP\fP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LS\fP +List scripts: write a list of recognized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-pattern\fP \fImodifier-list\fP +Behave as if each pattern line contains the given modifiers. +.TP 10 +\fB-q\fP +Do not output the version number of \fBpcre2test\fP at the start of execution. +.TP 10 +\fB-S\fP \fIsize\fP +On Unix-like systems, set the size of the run-time stack to \fIsize\fP +mebibytes (units of 1024*1024 bytes). +.TP 10 +\fB-subject\fP \fImodifier-list\fP +Behave as if each subject line contains the given modifiers. +.TP 10 +\fB-t\fP +Run each compile and match many times with a timer, and output the resulting +times per compile or match. When JIT is used, separate times are given for the +initial compile and the JIT compile. You can control the number of iterations +that are used for timing by following \fB-t\fP with a number (as a separate +item on the command line). For example, "-t 1000" iterates 1000 times. The +default is to iterate 500,000 times. +.TP 10 +\fB-tm\fP +This is like \fB-t\fP except that it times only the matching phase, not the +compile phase. +.TP 10 +\fB-T\fP \fB-TM\fP +These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run, +the total times for all compiles and matches are output. +.TP 10 +\fB-version\fP +Output the PCRE2 version number and then exit. +. +. +.SH "DESCRIPTION" +.rs +.sp +If \fBpcre2test\fP is given two filename arguments, it reads from the first and +writes to the second. If the first name is "-", input is taken from the +standard input. If \fBpcre2test\fP is given only one argument, it reads from +that file and writes to stdout. Otherwise, it reads from stdin and writes to +stdout. +.P +When \fBpcre2test\fP is built, a configuration option can specify that it +should be linked with the \fBlibreadline\fP or \fBlibedit\fP library. When this +is done, if the input is from a terminal, it is read using the \fBreadline()\fP +function. This provides line-editing and history facilities. The output from +the \fB-help\fP option states whether or not \fBreadline()\fP will be used. +.P +The program handles any number of tests, each of which consists of a set of +input lines. Each set starts with a regular expression pattern, followed by any +number of subject lines to be matched against that pattern. In between sets of +test data, command lines that begin with # may appear. This file format, with +some restrictions, can also be processed by the \fBperltest.sh\fP script that +is distributed with PCRE2 as a means of checking that the behaviour of PCRE2 +and Perl is the same. For a specification of \fBperltest.sh\fP, see the +comments near its beginning. See also the #perltest command below. +.P +When the input is a terminal, \fBpcre2test\fP prompts for each line of input, +using "re>" to prompt for regular expression patterns, and "data>" to prompt +for subject lines. Command lines starting with # can be entered only in +response to the "re>" prompt. +.P +Each subject line is matched separately and independently. If you want to do +multi-line matches, you have to use the \en escape sequence (or \er or \er\en, +etc., depending on the newline setting) in a single line of input to encode the +newline sequences. There is no limit on the length of subject lines; the input +buffer is automatically extended if it is too small. There are replication +features that makes it possible to generate long repetitive pattern or subject +lines without having to supply them explicitly. +.P +An empty line or the end of the file signals the end of the subject lines for a +test, at which point a new pattern or command line is expected if there is +still input to be read. +. +. +.SH "COMMAND LINES" +.rs +.sp +In between sets of test data, a line that begins with # is interpreted as a +command line. If the first character is followed by white space or an +exclamation mark, the line is treated as a comment, and ignored. Otherwise, the +following commands are recognized: +.sp + #forbid_utf +.sp +Subsequent patterns automatically have the PCRE2_NEVER_UTF and PCRE2_NEVER_UCP +options set, which locks out the use of the PCRE2_UTF and PCRE2_UCP options and +the use of (*UTF) and (*UCP) at the start of patterns. This command also forces +an error if a subsequent pattern contains any occurrences of \eP, \ep, or \eX, +which are still supported when PCRE2_UTF is not set, but which require Unicode +property support to be included in the library. +.P +This is a trigger guard that is used in test files to ensure that UTF or +Unicode property tests are not accidentally added to files that are used when +Unicode support is not included in the library. Setting PCRE2_NEVER_UTF and +PCRE2_NEVER_UCP as a default can also be obtained by the use of \fB#pattern\fP; +the difference is that \fB#forbid_utf\fP cannot be unset, and the automatic +options are not displayed in pattern information, to avoid cluttering up test +output. +.sp + #load +.sp +This command is used to load a set of precompiled patterns from a file, as +described in the section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +.sp + #loadtables +.sp +This command is used to load a set of binary character tables that can be +accessed by the tables=3 qualifier. Such tables can be created by the +\fBpcre2_dftables\fP program with the -b option. +.sp + #newline_default [] +.sp +When PCRE2 is built, a default newline convention can be specified. This +determines which characters and/or character pairs are recognized as indicating +a newline in a pattern or subject string. The default can be overridden when a +pattern is compiled. The standard test files contain tests of various newline +conventions, but the majority of the tests expect a single linefeed to be +recognized as a newline by default. Without special action the tests would fail +when PCRE2 is compiled with either CR or CRLF as the default newline. +.P +The #newline_default command specifies a list of newline types that are +acceptable as the default. The types must be one of CR, LF, CRLF, ANYCRLF, +ANY, or NUL (in upper or lower case), for example: +.sp + #newline_default LF Any anyCRLF +.sp +If the default newline is in the list, this command has no effect. Otherwise, +except when testing the POSIX API, a \fBnewline\fP modifier that specifies the +first newline convention in the list (LF in the above example) is added to any +pattern that does not already have a \fBnewline\fP modifier. If the newline +list is empty, the feature is turned off. This command is present in a number +of the standard test input files. +.P +When the POSIX API is being tested there is no way to override the default +newline convention, though it is possible to set the newline convention from +within the pattern. A warning is given if the \fBposix\fP or \fBposix_nosub\fP +modifier is used when \fB#newline_default\fP would set a default for the +non-POSIX API. +.sp + #pattern +.sp +This command sets a default modifier list that applies to all subsequent +patterns. Modifiers on a pattern can change these settings. +.sp + #perltest +.sp +This line is used in test files that can also be processed by \fBperltest.sh\fP +to confirm that Perl gives the same results as PCRE2. Subsequent tests are +checked for the use of \fBpcre2test\fP features that are incompatible with the +\fBperltest.sh\fP script. +.P +Patterns must use '/' as their delimiter, and only certain modifiers are +supported. Comment lines, #pattern commands, and #subject commands that set or +unset "mark" are recognized and acted on. The #perltest, #forbid_utf, and +#newline_default commands, which are needed in the relevant pcre2test files, +are silently ignored. All other command lines are ignored, but give a warning +message. The \fB#perltest\fP command helps detect tests that are accidentally +put in the wrong file or use the wrong delimiter. For more details of the +\fBperltest.sh\fP script see the comments it contains. +.sp + #pop [] + #popcopy [] +.sp +These commands are used to manipulate the stack of compiled patterns, as +described in the section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +.sp + #save +.sp +This command is used to save a set of compiled patterns to a file, as described +in the section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +.sp + #subject +.sp +This command sets a default modifier list that applies to all subsequent +subject lines. Modifiers on a subject line can change these settings. +. +. +.SH "MODIFIER SYNTAX" +.rs +.sp +Modifier lists are used with both pattern and subject lines. Items in a list +are separated by commas followed by optional white space. Trailing whitespace +in a modifier list is ignored. Some modifiers may be given for both patterns +and subject lines, whereas others are valid only for one or the other. Each +modifier has a long name, for example "anchored", and some of them must be +followed by an equals sign and a value, for example, "offset=12". Values cannot +contain comma characters, but may contain spaces. Modifiers that do not take +values may be preceded by a minus sign to turn off a previous setting. +.P +A few of the more common modifiers can also be specified as single letters, for +example "i" for "caseless". In documentation, following the Perl convention, +these are written with a slash ("the /i modifier") for clarity. Abbreviated +modifiers must all be concatenated in the first item of a modifier list. If the +first item is not recognized as a long modifier name, it is interpreted as a +sequence of these abbreviations. For example: +.sp + /abc/ig,newline=cr,jit=3 +.sp +This is a pattern line whose modifier list starts with two one-letter modifiers +(/i and /g). The lower-case abbreviated modifiers are the same as used in Perl. +. +. +.SH "PATTERN SYNTAX" +.rs +.sp +A pattern line must start with one of the following characters (common symbols, +excluding pattern meta-characters): +.sp + / ! " ' ` - = _ : ; , % & @ ~ +.sp +This is interpreted as the pattern's delimiter. A regular expression may be +continued over several input lines, in which case the newline characters are +included within it. It is possible to include the delimiter as a literal within +the pattern by escaping it with a backslash, for example +.sp + /abc\e/def/ +.sp +If you do this, the escape and the delimiter form part of the pattern, but +since the delimiters are all non-alphanumeric, the inclusion of the backslash +does not affect the pattern's interpretation. Note, however, that this trick +does not work within \eQ...\eE literal bracketing because the backslash will +itself be interpreted as a literal. If the terminating delimiter is immediately +followed by a backslash, for example, +.sp + /abc/\e +.sp +a backslash is added to the end of the pattern. This is done to provide a way +of testing the error condition that arises if a pattern finishes with a +backslash, because +.sp + /abc\e/ +.sp +is interpreted as the first line of a pattern that starts with "abc/", causing +pcre2test to read the next line as a continuation of the regular expression. +.P +A pattern can be followed by a modifier list (details below). +. +. +.SH "SUBJECT LINE SYNTAX" +.rs +.sp +Before each subject line is passed to \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP, leading and trailing white +space is removed, and the line is scanned for backslash escapes, unless the +\fBsubject_literal\fP modifier was set for the pattern. The following provide a +means of encoding non-printing characters in a visible way: +.sp + \ea alarm (BEL, \ex07) + \eb backspace (\ex08) + \ee escape (\ex27) + \ef form feed (\ex0c) + \en newline (\ex0a) + \eN{U+hh...} unicode character (any number of hex digits) + \er carriage return (\ex0d) + \et tab (\ex09) + \ev vertical tab (\ex0b) + \eddd octal number (up to 3 octal digits); represent a single + code point unless larger than 255 with the 8-bit library + \eo{dd...} octal number (any number of octal digits} representing a + character in UTF mode or a code point + \exhh hexadecimal byte (up to 2 hex digits) + \ex{hh...} hexadecimal number (up to 8 hex digits) representing a + character in UTF mode or a code point +.sp +Invoking \eN{U+hh...} or \ex{hh...} doesn't require the use of the \fButf\fP +modifier on the pattern. It is always recognized. There may be any number of +hexadecimal digits inside the braces; invalid values provoke error messages +but when using \eN{U+hh...} with some invalid unicode characters they will +be accepted with a warning instead. +.P +Note that even in UTF-8 mode, \exhh (and depending of how large, \eddd) +describe one byte rather than one character; this makes it possible to +construct invalid UTF-8 sequences for testing purposes. On the other hand, +\ex{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only generating +more than one byte if the value is greater than 127. To avoid the ambiguity +it is preferred to use \eN{U+hh...} when describing characters. When testing +the 8-bit library not in UTF-8 mode, \ex{hh} generates one byte for values +that could fit on it, and causes an error for greater values. +.P +When testing the 16-bit library, not in UTF-16 mode, all 4-digit \ex{hhhh} +values are accepted. This makes it possible to construct invalid UTF-16 +sequences for testing purposes. +.P +When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit \ex{...} +values are accepted. This makes it possible to construct invalid UTF-32 +sequences for testing purposes. +.P +There is a special backslash sequence that specifies replication of one or more +characters: +.sp + \e[]{} +.sp +This makes it possible to test long strings without having to provide them as +part of the file. For example: +.sp + \e[abc]{4} +.sp +is converted to "abcabcabcabc". This feature does not support nesting. To +include a closing square bracket in the characters, code it as \ex5D. +.P +A backslash followed by an equals sign marks the end of the subject string and +the start of a modifier list. For example: +.sp + abc\e=notbol,notempty +.sp +If the subject string is empty and \e= is followed by whitespace, the line is +treated as a comment line, and is not used for matching. For example: +.sp + \e= This is a comment. + abc\e= This is an invalid modifier list. +.sp +A backslash followed by any other non-alphanumeric character just escapes that +character. A backslash followed by anything else causes an error. However, if +the very last character in the line is a backslash (and there is no modifier +list), it is ignored. This gives a way of passing an empty line as data, since +a real empty line terminates the data input. +.P +If the \fBsubject_literal\fP modifier is set for a pattern, all subject lines +that follow are treated as literals, with no special treatment of backslashes. +No replication is possible, and any subject modifiers must be set as defaults +by a \fB#subject\fP command. +. +. +.SH "PATTERN MODIFIERS" +.rs +.sp +There are several types of modifier that can appear in pattern lines. Except +where noted below, they may also be used in \fB#pattern\fP commands. A +pattern's modifier list can add to or override default modifiers that were set +by a previous \fB#pattern\fP command. +. +. +.\" HTML +.SS "Setting compilation options" +.rs +.sp +The following modifiers set options for \fBpcre2_compile()\fP. Most of them set +bits in the options argument of that function, but those whose names start with +PCRE2_EXTRA are additional options that are set in the compile context. +Some of these options have single-letter abbreviations. There is special +handling for /x: if a second x is present, PCRE2_EXTENDED is converted into +PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well, +though this makes no difference to the way \fBpcre2_compile()\fP behaves. See +.\" HREF +\fBpcre2api\fP +.\" +for a description of the effects of these options. +.sp + allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS + allow_lookaround_bsk set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK + allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + alt_bsux set PCRE2_ALT_BSUX + alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_extended_class set PCRE2_ALT_EXTENDED_CLASS + alt_verbnames set PCRE2_ALT_VERBNAMES + anchored set PCRE2_ANCHORED + /a ascii_all set all ASCII options + ascii_bsd set PCRE2_EXTRA_ASCII_BSD + ascii_bss set PCRE2_EXTRA_ASCII_BSS + ascii_bsw set PCRE2_EXTRA_ASCII_BSW + ascii_digit set PCRE2_EXTRA_ASCII_DIGIT + ascii_posix set PCRE2_EXTRA_ASCII_POSIX + auto_callout set PCRE2_AUTO_CALLOUT + bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + /i caseless set PCRE2_CASELESS + /r caseless_restrict set PCRE2_EXTRA_CASELESS_RESTRICT + dollar_endonly set PCRE2_DOLLAR_ENDONLY + /s dotall set PCRE2_DOTALL + dupnames set PCRE2_DUPNAMES + endanchored set PCRE2_ENDANCHORED + escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF + /x extended set PCRE2_EXTENDED + /xx extended_more set PCRE2_EXTENDED_MORE + extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX + firstline set PCRE2_FIRSTLINE + literal set PCRE2_LITERAL + match_line set PCRE2_EXTRA_MATCH_LINE + match_invalid_utf set PCRE2_MATCH_INVALID_UTF + match_unset_backref set PCRE2_MATCH_UNSET_BACKREF + match_word set PCRE2_EXTRA_MATCH_WORD + /m multiline set PCRE2_MULTILINE + never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_callout set PCRE2_EXTRA_NEVER_CALLOUT + never_ucp set PCRE2_NEVER_UCP + never_utf set PCRE2_NEVER_UTF + /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE + no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_bs0 set PCRE2_EXTRA_NO_BS0 + no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR + no_start_optimize set PCRE2_NO_START_OPTIMIZE + no_utf_check set PCRE2_NO_UTF_CHECK + python_octal set PCRE2_EXTRA_PYTHON_OCTAL + turkish_casing set PCRE2_EXTRA_TURKISH_CASING + ucp set PCRE2_UCP + ungreedy set PCRE2_UNGREEDY + use_offset_limit set PCRE2_USE_OFFSET_LIMIT + utf set PCRE2_UTF +.sp +As well as turning on the PCRE2_UTF option, the \fButf\fP modifier causes all +non-printing characters in output strings to be printed using the \ex{hh...} +notation. Otherwise, those less than 0x100 are output in hex without the curly +brackets. Setting \fButf\fP in 16-bit or 32-bit mode also causes pattern and +subject strings to be translated to UTF-16 or UTF-32, respectively, before +being passed to library functions. +.sp +The following modifiers enable or disable performance optimizations by +calling \fBpcre2_set_optimize()\fP before invoking the regex compiler. +.sp + optimization_full enable all optional optimizations + optimization_none disable all optional optimizations + auto_possess auto-possessify variable quantifiers + auto_possess_off don't auto-possessify variable quantifiers + dotstar_anchor anchor patterns starting with .* + dotstar_anchor_off don't anchor patterns starting with .* + start_optimize enable pre-scan of subject string + start_optimize_off disable pre-scan of subject string +.sp +See the +.\" HREF +\fBpcre2_set_optimize\fP +.\" +documentation for details on these optimizations. +. +. +.\" HTML +.SS "Setting compilation controls" +.rs +.sp +The following modifiers affect the compilation process or request information +about the pattern. There are single-letter abbreviations for some that are +heavily used in the test files. +.sp + /B bincode show binary code without lengths + bsr=[anycrlf|unicode] specify \eR handling + callout_info show callout information + convert= request foreign pattern conversion + convert_glob_escape=c set glob escape character + convert_glob_separator=c set glob separator character + convert_length set convert buffer length + debug same as info,fullbincode + expand expand repetition syntax in pattern + framesize show matching frame size + fullbincode show binary code with lengths + /I info show info about compiled pattern + hex unquoted characters are hexadecimal + jit[=] use JIT + jitfast use JIT fast path + jitverify verify JIT use + locale= use this locale + max_pattern_compiled ) set maximum compiled pattern + _length= ) length (bytes) + max_pattern_length= set maximum pattern length (code units) + max_varlookbehind= set maximum variable lookbehind length + memory show memory used + newline= set newline type + null_context compile with a NULL context + null_pattern pass pattern as NULL + parens_nest_limit= set maximum parentheses depth + posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB + push push compiled pattern onto the stack + pushcopy push a copy onto the stack + pushtablescopy push a copy with tables onto the stack + stackguard= test the stackguard feature + subject_literal treat all subject lines as literal + tables=[0|1|2|3] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8 +.sp +The effects of these modifiers are described in the following sections. +. +. +.SS "Newline and \eR handling" +.rs +.sp +The \fBbsr\fP modifier specifies what \eR in a pattern should match. If it is +set to "anycrlf", \eR matches CR, LF, or CRLF only. If it is set to "unicode", +\eR matches any Unicode newline sequence. The default can be specified when +PCRE2 is built; if it is not, the default is set to Unicode. +.P +The \fBnewline\fP modifier specifies which characters are to be interpreted as +newlines, both in the pattern and in subject lines. The type must be one of CR, +LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). +. +. +.SS "Information about a pattern" +.rs +.sp +The \fBdebug\fP modifier is a shorthand for \fBinfo,fullbincode\fP, requesting +all available information. +.P +The \fBbincode\fP modifier causes a representation of the compiled code to be +output after compilation. This information does not contain length and offset +values, which ensures that the same output is generated for different internal +link sizes and different code unit widths. By using \fBbincode\fP, the same +regression tests can be used in different environments. +.P +The \fBfullbincode\fP modifier, by contrast, \fIdoes\fP include length and +offset values. This is used in a few special tests that run only for specific +code unit widths and link sizes, and is also useful for one-off tests. +.P +The \fBinfo\fP modifier requests information about the compiled pattern +(whether it is anchored, has a fixed first character, and so on). The +information is obtained from the \fBpcre2_pattern_info()\fP function. Here are +some typical examples: +.sp + re> /(?i)(^a|^b)/m,info + Capture group count = 1 + Compile options: multiline + Overall options: caseless multiline + First code unit at start or follows newline + Subject length lower bound = 1 +.sp + re> /(?i)abc/info + Capture group count = 0 + Compile options: + Overall options: caseless + First code unit = 'a' (caseless) + Last code unit = 'c' (caseless) + Subject length lower bound = 3 +.sp +"Compile options" are those specified by modifiers; "overall options" have +added options that are taken or deduced from the pattern. If both sets of +options are the same, just a single "options" line is output; if there are no +options, the line is omitted. "First code unit" is where any match must start; +if there is more than one they are listed as "starting code units". "Last code +unit" is the last literal code unit that must be present in any match. This is +not necessarily the last character. These lines are omitted if no starting or +ending code units are recorded. The subject length line is omitted when +\fBno_start_optimize\fP is set because the minimum length is not calculated +when it can never be used. +.P +The \fBframesize\fP modifier shows the size, in bytes, of each storage frame +used by \fBpcre2_match()\fP for handling backtracking. The size depends on the +number of capturing parentheses in the pattern. A vector of these frames is +used at matching time; its overall size is shown when the \fBheaframes_size\fP +subject modifier is set. +.P +The \fBcallout_info\fP modifier requests information about all the callouts in +the pattern. A list of them is output at the end of any other information that +is requested. For each callout, either its number or string is given, followed +by the item that follows it in the pattern. +. +. +.SS "Passing a NULL context" +.rs +.sp +Normally, \fBpcre2test\fP passes a context block to \fBpcre2_compile()\fP. If +the \fBnull_context\fP modifier is set, however, NULL is passed. This is for +testing that \fBpcre2_compile()\fP behaves correctly in this case (it uses +default values). +. +. +.SS "Passing a NULL pattern" +.rs +.sp +The \fBnull_pattern\fP modifier is for testing the behaviour of +\fBpcre2_compile()\fP when the pattern argument is NULL. The length value +passed is the default PCRE2_ZERO_TERMINATED unless \fBuse_length\fP is set. +Any length other than zero causes an error. +. +. +.SS "Specifying pattern characters in hexadecimal" +.rs +.sp +The \fBhex\fP modifier specifies that the characters of the pattern, except for +substrings enclosed in single or double quotes, are to be interpreted as pairs +of hexadecimal digits. This feature is provided as a way of creating patterns +that contain binary zeros and other non-printing characters. White space is +permitted between pairs of digits. For example, this pattern contains three +characters: +.sp + /ab 32 59/hex +.sp +Parts of such a pattern are taken literally if quoted. This pattern contains +nine characters, only two of which are specified in hexadecimal: +.sp + /ab "literal" 32/hex +.sp +Either single or double quotes may be used. There is no way of including +the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are +mutually exclusive. +. +. +.SS "Specifying the pattern's length" +.rs +.sp +By default, patterns are passed to the compiling functions as zero-terminated +strings but can be passed by length instead of being zero-terminated. The +\fBuse_length\fP modifier causes this to happen. Using a length happens +automatically (whether or not \fBuse_length\fP is set) when \fBhex\fP is set, +because patterns specified in hexadecimal may contain binary zeros. +.P +If \fBhex\fP or \fBuse_length\fP is used with the POSIX wrapper API (see +.\" HTML +.\" +"Using the POSIX wrapper API" +.\" +below), the REG_PEND extension is used to pass the pattern's length. +. +. +.SS "Specifying a maximum for variable lookbehinds" +.rs +.sp +Variable lookbehind assertions are supported only if, for each one, there is a +maximum length (in characters) that it can match. There is a limit on this, +whose default can be set at build time, with an ultimate default of 255. The +\fBmax_varlookbehind\fP modifier uses the \fBpcre2_set_max_varlookbehind()\fP +function to change the limit. Lookbehinds whose branches each match a fixed +length are limited to 65535 characters per branch. +. +. +.SS "Specifying wide characters in 16-bit and 32-bit modes" +.rs +.sp +In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and +translated to UTF-16 or UTF-32 when the \fButf\fP modifier is set. For testing +the 16-bit and 32-bit libraries in non-UTF mode, the \fButf8_input\fP modifier +can be used. It is mutually exclusive with \fButf\fP. Input lines are +interpreted as UTF-8 as a means of specifying wide characters. More details are +given in +.\" HTML +.\" +"Input encoding" +.\" +above. +. +. +.SS "Generating long repetitive patterns" +.rs +.sp +Some tests use long patterns that are very repetitive. Instead of creating a +very long input line for such a pattern, you can use a special repetition +feature, similar to the one described for subject lines above. If the +\fBexpand\fP modifier is present on a pattern, parts of the pattern that have +the form +.sp + \e[]{} +.sp +are expanded before the pattern is passed to \fBpcre2_compile()\fP. For +example, \e[AB]{6000} is expanded to "ABAB..." 6000 times. This construction +cannot be nested. An initial "\e[" sequence is recognized only if "]{" followed +by decimal digits and "}" is found later in the pattern. If not, the characters +remain in the pattern unaltered. The \fBexpand\fP and \fBhex\fP modifiers are +mutually exclusive. +.P +If part of an expanded pattern looks like an expansion, but is really part of +the actual pattern, unwanted expansion can be avoided by giving two values in +the quantifier. For example, \e[AB]{6000,6000} is not recognized as an +expansion item. +.P +If the \fBinfo\fP modifier is set on an expanded pattern, the result of the +expansion is included in the information that is output. +. +. +.SS "JIT compilation" +.rs +.sp +Just-in-time (JIT) compiling is a heavyweight optimization that can greatly +speed up pattern matching. See the +.\" HREF +\fBpcre2jit\fP +.\" +documentation for details. JIT compiling happens, optionally, after a pattern +has been successfully compiled into an internal form. The JIT compiler converts +this to optimized machine code. It needs to know whether the match-time options +PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, because +different code is generated for the different cases. See the \fBpartial\fP +modifier in "Subject Modifiers" +.\" HTML +.\" +below +.\" +for details of how these options are specified for each match attempt. +.P +JIT compilation is requested by the \fBjit\fP pattern modifier, which may +optionally be followed by an equals sign and a number in the range 0 to 7. +The three bits that make up the number specify which of the three JIT operating +modes are to be compiled: +.sp + 1 compile JIT code for non-partial matching + 2 compile JIT code for soft partial matching + 4 compile JIT code for hard partial matching +.sp +The possible values for the \fBjit\fP modifier are therefore: +.sp + 0 disable JIT + 1 normal matching only + 2 soft partial matching only + 3 normal and soft partial matching + 4 hard partial matching only + 6 soft and hard partial matching only + 7 all three modes +.sp +If no number is given, 7 is assumed. The phrase "partial matching" means a call +to \fBpcre2_match()\fP with either the PCRE2_PARTIAL_SOFT or the +PCRE2_PARTIAL_HARD option set. Note that such a call may return a complete +match; the options enable the possibility of a partial match, but do not +require it. Note also that if you request JIT compilation only for partial +matching (for example, jit=2) but do not set the \fBpartial\fP modifier on a +subject line, that match will not use JIT code because none was compiled for +non-partial matching. +.P +If JIT compilation is successful, the compiled JIT code will automatically be +used when an appropriate type of match is run, except when incompatible +run-time options are specified. For more details, see the +.\" HREF +\fBpcre2jit\fP +.\" +documentation. See also the \fBjitstack\fP modifier below for a way of +setting the size of the JIT stack. +.P +If the \fBjitfast\fP modifier is specified, matching is done using the JIT +"fast path" interface, \fBpcre2_jit_match()\fP, which skips some of the sanity +checks that are done by \fBpcre2_match()\fP, and of course does not work when +JIT is not supported. If \fBjitfast\fP is specified without \fBjit\fP, jit=7 is +assumed. +.P +If the \fBjitverify\fP modifier is specified, information about the compiled +pattern shows whether JIT compilation was or was not successful. If +\fBjitverify\fP is specified without \fBjit\fP, jit=7 is assumed. If JIT +compilation is successful when \fBjitverify\fP is set, the text "(JIT)" is +added to the first output line after a match or non match when JIT-compiled +code was actually used in the match. +. +. +.SS "Setting a locale" +.rs +.sp +The \fBlocale\fP modifier must specify the name of a locale, for example: +.sp + /pattern/locale=fr_FR +.sp +The given locale is set, \fBpcre2_maketables()\fP is called to build a set of +character tables for the locale, and this is then passed to +\fBpcre2_compile()\fP when compiling the regular expression. The same tables +are used when matching the following subject lines. The \fBlocale\fP modifier +applies only to the pattern on which it appears, but can be given in a +\fB#pattern\fP command if a default is needed. Setting a locale and alternate +character tables are mutually exclusive. +. +. +.SS "Showing pattern memory" +.rs +.sp +The \fBmemory\fP modifier causes the size in bytes of the memory used to hold +the compiled pattern to be output. This does not include the size of the +\fBpcre2_code\fP block; it is just the actual compiled data. If the pattern is +subsequently passed to the JIT compiler, the size of the JIT compiled code is +also output. Here is an example: +.sp + re> /a(b)c/jit,memory + Memory allocation (code space): 21 + Memory allocation (JIT code): 1910 +.sp +. +. +.SS "Limiting nested parentheses" +.rs +.sp +The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested +parentheses in a pattern. Breaching the limit causes a compilation error. +The default for the library is set when PCRE2 is built, but \fBpcre2test\fP +sets its own default of 220, which is required for running the standard test +suite. +. +. +.SS "Limiting the pattern length" +.rs +.sp +The \fBmax_pattern_length\fP modifier sets a limit, in code units, to the +length of pattern that \fBpcre2_compile()\fP will accept. Breaching the limit +causes a compilation error. The default is the largest number a PCRE2_SIZE +variable can hold (essentially unlimited). +. +. +.SS "Limiting the size of a compiled pattern" +.rs +.sp +The \fBmax_pattern_compiled_length\fP modifier sets a limit, in bytes, to the +amount of memory used by a compiled pattern. Breaching the limit causes a +compilation error. The default is the largest number a PCRE2_SIZE variable can +hold (essentially unlimited). +. +. +.\" HTML +.SS "Using the POSIX wrapper API" +.rs +.sp +The \fBposix\fP and \fBposix_nosub\fP modifiers cause \fBpcre2test\fP to call +PCRE2 via the POSIX wrapper API rather than its native API. When +\fBposix_nosub\fP is used, the POSIX option REG_NOSUB is passed to +\fBregcomp()\fP. The POSIX wrapper supports only the 8-bit library. Note that +it does not imply POSIX matching semantics; for more detail see the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. The following pattern modifiers set options for the +\fBregcomp()\fP function: +.sp + caseless REG_ICASE + multiline REG_NEWLINE + dotall REG_DOTALL ) + ungreedy REG_UNGREEDY ) These options are not part of + ucp REG_UCP ) the POSIX standard + utf REG_UTF8 ) +.sp +The \fBregerror_buffsize\fP modifier specifies a size for the error buffer that +is passed to \fBregerror()\fP in the event of a compilation error. For example: +.sp + /abc/posix,regerror_buffsize=20 +.sp +This provides a means of testing the behaviour of \fBregerror()\fP when the +buffer is too small for the error message. If this modifier has not been set, a +large buffer is used. +.P +The \fBaftertext\fP and \fBallaftertext\fP subject modifiers work as described +below. All other modifiers are either ignored, with a warning message, or cause +an error. +.P +The pattern is passed to \fBregcomp()\fP as a zero-terminated string by +default, but if the \fBuse_length\fP or \fBhex\fP modifiers are set, the +REG_PEND extension is used to pass it by length. +. +. +.SS "Testing the stack guard feature" +.rs +.sp +The \fBstackguard\fP modifier is used to test the use of +\fBpcre2_set_compile_recursion_guard()\fP, a function that is provided to +enable stack availability to be checked during compilation (see the +.\" HREF +\fBpcre2api\fP +.\" +documentation for details). If the number specified by the modifier is greater +than zero, \fBpcre2_set_compile_recursion_guard()\fP is called to set up +callback from \fBpcre2_compile()\fP to a local function. The argument it +receives is the current nesting parenthesis depth; if this is greater than the +value given by the modifier, non-zero is returned, causing the compilation to +be aborted. +. +. +.SS "Using alternative character tables" +.rs +.sp +The value specified for the \fBtables\fP modifier must be one of the digits 0, +1, 2, or 3. It causes a specific set of built-in character tables to be passed +to \fBpcre2_compile()\fP. This is used in the PCRE2 tests to check behaviour +with different character tables. The digit specifies the tables as follows: +.sp + 0 do not pass any special character tables + 1 the default ASCII tables, as distributed in + pcre2_chartables.c.dist + 2 a set of tables defining ISO 8859 characters + 3 a set of tables loaded by the #loadtables command +.sp +In tables 2, some characters whose codes are greater than 128 are identified as +letters, digits, spaces, etc. Tables 3 can be used only after a +\fB#loadtables\fP command has loaded them from a binary file. Setting alternate +character tables and a locale are mutually exclusive. +. +. +.SS "Setting certain match controls" +.rs +.sp +The following modifiers are really subject modifiers, and are described under +"Subject Modifiers" below. However, they may be included in a pattern's +modifier list, in which case they are applied to every subject line that is +processed with that pattern. These modifiers do not affect the compilation +process. +.sp + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allvector show the entire ovector + allusedtext show all consulted text + altglobal alternative global matching + /g global global matching + heapframes_size show match data heapframes size + jitstack= set size of JIT stack + mark show mark values + replace= specify a replacement string + startchar show starting character when relevant + substitute_callout use substitution callouts + substitute_case_callout use substitution case callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution + substitute_stop= skip substitution and following + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY +.sp +These modifiers may not appear in a \fB#pattern\fP command. If you want them as +defaults, set them in a \fB#subject\fP command. +. +. +.SS "Specifying literal subject lines" +.rs +.sp +If the \fBsubject_literal\fP modifier is present on a pattern, all the subject +lines that it matches are taken as literal strings, with no interpretation of +backslashes. It is not possible to set subject modifiers on such lines, but any +that are set as defaults by a \fB#subject\fP command are recognized. +. +. +.SS "Saving a compiled pattern" +.rs +.sp +When a pattern with the \fBpush\fP modifier is successfully compiled, it is +pushed onto a stack of compiled patterns, and \fBpcre2test\fP expects the next +line to contain a new pattern (or a command) instead of a subject line. This +facility is used when saving compiled patterns to a file, as described in the +section entitled "Saving and restoring compiled patterns" +.\" HTML +.\" +below. +.\" +If \fBpushcopy\fP is used instead of \fBpush\fP, a copy of the compiled +pattern is stacked, leaving the original as current, ready to match the +following input lines. This provides a way of testing the +\fBpcre2_code_copy()\fP function. +.\" +The \fBpush\fP and \fBpushcopy \fP modifiers are incompatible with compilation +modifiers such as \fBglobal\fP that act at match time. Any that are specified +are ignored (for the stacked copy), with a warning message, except for +\fBreplace\fP, which causes an error. Note that \fBjitverify\fP, which is +allowed, does not carry through to any subsequent matching that uses a stacked +pattern. +. +. +.SS "Testing foreign pattern conversion" +.rs +.sp +The experimental foreign pattern conversion functions in PCRE2 can be tested by +setting the \fBconvert\fP modifier. Its argument is a colon-separated list of +options, which set the equivalent option for the \fBpcre2_pattern_convert()\fP +function: +.sp + glob PCRE2_CONVERT_GLOB + glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR + glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR + posix_basic PCRE2_CONVERT_POSIX_BASIC + posix_extended PCRE2_CONVERT_POSIX_EXTENDED + unset Unset all options +.sp +The "unset" value is useful for turning off a default that has been set by a +\fB#pattern\fP command. When one of these options is set, the input pattern is +passed to \fBpcre2_pattern_convert()\fP. If the conversion is successful, the +result is reflected in the output and then passed to \fBpcre2_compile()\fP. The +normal \fButf\fP and \fBno_utf_check\fP options, if set, cause the +PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be passed to +\fBpcre2_pattern_convert()\fP. +.P +By default, the conversion function is allowed to allocate a buffer for its +output. However, if the \fBconvert_length\fP modifier is set to a value greater +than zero, \fBpcre2test\fP passes a buffer of the given length. This makes it +possible to test the length check. +.P +The \fBconvert_glob_escape\fP and \fBconvert_glob_separator\fP modifiers can be +used to specify the escape and separator characters for glob processing, +overriding the defaults, which are operating-system dependent. +. +. +.\" HTML +.SH "SUBJECT MODIFIERS" +.rs +.sp +The modifiers that can appear in subject lines and the \fB#subject\fP +command are of two types. +. +. +.SS "Setting match options" +.rs +.sp +The following modifiers set options for \fBpcre2_match()\fP or +\fBpcre2_dfa_match()\fP. See +.\" HREF +\fBpcre2api\fP +.\" +for a description of their effects. +.sp + anchored set PCRE2_ANCHORED + copy_matched_subject set PCRE2_COPY_MATCHED_SUBJECT + endanchored set PCRE2_ENDANCHORED + dfa_restart set PCRE2_DFA_RESTART + dfa_shortest set PCRE2_DFA_SHORTEST + disable_recurseloop_check set PCRE2_DISABLE_RECURSELOOP_CHECK + no_jit set PCRE2_NO_JIT + no_utf_check set PCRE2_NO_UTF_CHECK + notbol set PCRE2_NOTBOL + notempty set PCRE2_NOTEMPTY + notempty_atstart set PCRE2_NOTEMPTY_ATSTART + noteol set PCRE2_NOTEOL + partial_hard (or ph) set PCRE2_PARTIAL_HARD + partial_soft (or ps) set PCRE2_PARTIAL_SOFT +.sp +The partial matching modifiers are provided with abbreviations because they +appear frequently in tests. +.P +If the \fBposix\fP or \fBposix_nosub\fP modifier was present on the pattern, +causing the POSIX wrapper API to be used, the only option-setting modifiers +that have any effect are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, +causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to +\fBregexec()\fP. The other modifiers are ignored, with a warning message. +.P +There is one additional modifier that can be used with the POSIX wrapper. It is +ignored (with a warning) if used for non-POSIX matching. +.sp + posix_startend=[:] +.sp +This causes the subject string to be passed to \fBregexec()\fP using the +REG_STARTEND option, which uses offsets to specify which part of the string is +searched. If only one number is given, the end offset is passed as the end of +the subject string. For more detail of REG_STARTEND, see the +.\" HREF +\fBpcre2posix\fP +.\" +documentation. If the subject string contains binary zeros (coded as escapes +such as \ex{00} because \fBpcre2test\fP does not support actual binary zeros in +its input), you must use \fBposix_startend\fP to specify its length. +. +. +.SS "Setting match controls" +.rs +.sp +The following modifiers affect the matching process or request additional +information. Some of them may also be specified on a pattern line (see above), +in which case they apply to every subject line that is matched against that +pattern, but can be overridden by modifiers on the subject. +.sp + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text (non-JIT only) + allvector show the entire ovector + altglobal alternative global matching + callout_capture show captures at callout time + callout_data= set a value to pass via callouts + callout_error=[:] control callout error + callout_extra show extra callout information + callout_fail=[:] control callout failure + callout_no_where do not show position of a callout + callout_none do not supply a callout function + copy= copy captured substring + depth_limit= set a depth limit + dfa use \fBpcre2_dfa_match()\fP + find_limits find heap, match and depth limits + find_limits_noheap find match and depth limits + get= extract captured substring + getall extract all captured substrings + /g global global matching + heapframes_size show match data heapframes size + heap_limit= set a limit on heap memory (Kbytes) + jitstack= set size of JIT stack + mark show mark values + match_limit= set a match limit + memory show heap memory usage + null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject + offset= set starting offset + offset_limit= set offset limit + ovector= set size of output vector + recursion_limit= obsolete synonym for depth_limit + replace= specify a replacement string + startchar show startchar when relevant + startoffset= same as offset= + substitute_callout use substitution callouts + substitute_case_callout use substitution case callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution number n + substitute_stop= skip substitution number n and greater + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + zero_terminate pass the subject as zero-terminated +.sp +The effects of these modifiers are described in the following sections. When +matching via the POSIX wrapper API, the \fBaftertext\fP, \fBallaftertext\fP, +and \fBovector\fP subject modifiers work as described below. All other +modifiers are either ignored, with a warning message, or cause an error. +. +. +.SS "Showing more text" +.rs +.sp +The \fBaftertext\fP modifier requests that as well as outputting the part of +the subject string that matched the entire pattern, \fBpcre2test\fP should in +addition output the remainder of the subject string. This is useful for tests +where the subject contains multiple copies of the same substring. The +\fBallaftertext\fP modifier requests the same action for captured substrings as +well as the main matched substring. In each case the remainder is output on the +following line with a plus character following the capture number. +.P +The \fBallusedtext\fP modifier requests that all the text that was consulted +during a successful pattern match by the interpreter should be shown, for both +full and partial matches. This feature is not supported for JIT matching, and +if requested with JIT it is ignored (with a warning message). Setting this +modifier affects the output if there is a lookbehind at the start of a match, +or, for a complete match, a lookahead at the end, or if \eK is used in the +pattern. Characters that precede or follow the start and end of the actual +match are indicated in the output by '<' or '>' characters underneath them. +Here is an example: +.sp + re> /(?<=pqr)abc(?=xyz)/ + data> 123pqrabcxyz456\e=allusedtext + 0: pqrabcxyz + <<< >>> + data> 123pqrabcxy\e=ph,allusedtext + Partial match: pqrabcxy + <<< +.sp +The first, complete match shows that the matched string is "abc", with the +preceding and following strings "pqr" and "xyz" having been consulted during +the match (when processing the assertions). The partial match can indicate only +the preceding string. +.P +The \fBstartchar\fP modifier requests that the starting character for the match +be indicated, if it is different to the start of the matched string. The only +time when this occurs is when \eK has been processed as part of the match. In +this situation, the output for the matched string is displayed from the +starting character instead of from the match point, with circumflex characters +under the earlier characters. For example: +.sp + re> /abc\eKxyz/ + data> abcxyz\e=startchar + 0: abcxyz + ^^^ +.sp +Unlike \fBallusedtext\fP, the \fBstartchar\fP modifier can be used with JIT. +However, these two modifiers are mutually exclusive. +. +. +.SS "Showing the value of all capture groups" +.rs +.sp +The \fBallcaptures\fP modifier requests that the values of all potential +captured parentheses be output after a match. By default, only those up to the +highest one actually used in the match are output (corresponding to the return +code from \fBpcre2_match()\fP). Groups that did not take part in the match +are output as "". This modifier is not relevant for DFA matching (which +does no capturing) and does not apply when \fBreplace\fP is specified; it is +ignored, with a warning message, if present. +. +. +.SS "Showing the entire ovector, for all outcomes" +.rs +.sp +The \fBallvector\fP modifier requests that the entire ovector be shown, +whatever the outcome of the match. Compare \fBallcaptures\fP, which shows only +up to the maximum number of capture groups for the pattern, and then only for a +successful complete non-DFA match. This modifier, which acts after any match +result, and also for DFA matching, provides a means of checking that there are +no unexpected modifications to ovector fields. Before each match attempt, the +ovector is filled with a special value, and if this is found in both elements +of a capturing pair, "" is output. After a successful match, this +applies to all groups after the maximum capture group for the pattern. In other +cases it applies to the entire ovector. After a partial match, the first two +elements are the only ones that should be set. After a DFA match, the amount of +ovector that is used depends on the number of matches that were found. +. +. +.SS "Testing pattern callouts" +.rs +.sp +A callout function is supplied when \fBpcre2test\fP calls the library matching +functions, unless \fBcallout_none\fP is specified. Its behaviour can be +controlled by various modifiers listed above whose names begin with +\fBcallout_\fP. Details are given in the section entitled "Callouts" +.\" HTML +.\" +below. +.\" +Testing callouts from \fBpcre2_substitute()\fP is described separately in +"Testing the substitution function" +.\" HTML +.\" +below. +.\" +. +. +.SS "Finding all matches in a string" +.rs +.sp +Searching for all possible matches within a subject can be requested by the +\fBglobal\fP or \fBaltglobal\fP modifier. After finding a match, the matching +function is called again to search the remainder of the subject. The difference +between \fBglobal\fP and \fBaltglobal\fP is that the former uses the +\fIstart_offset\fP argument to \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP +to start searching at a new point within the entire string (which is what Perl +does), whereas the latter passes over a shortened subject. This makes a +difference to the matching process if the pattern begins with a lookbehind +assertion (including \eb or \eB). +.P +If an empty string is matched, the next match is done with the +PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search for +another, non-empty, match at the same point in the subject. If this match +fails, the start offset is advanced, and the normal match is retried. This +imitates the way Perl handles such cases when using the \fB/g\fP modifier or +the \fBsplit()\fP function. Normally, the start offset is advanced by one +character, but if the newline convention recognizes CRLF as a newline, and the +current character is CR followed by LF, an advance of two characters occurs. +. +. +.SS "Testing substring extraction functions" +.rs +.sp +The \fBcopy\fP and \fBget\fP modifiers can be used to test the +\fBpcre2_substring_copy_xxx()\fP and \fBpcre2_substring_get_xxx()\fP functions. +They can be given more than once, and each can specify a capture group name or +number, for example: +.sp + abcd\e=copy=1,copy=3,get=G1 +.sp +If the \fB#subject\fP command is used to set default copy and/or get lists, +these can be unset by specifying a negative number to cancel all numbered +groups and an empty name to cancel all named groups. +.P +The \fBgetall\fP modifier tests \fBpcre2_substring_list_get()\fP, which +extracts all captured substrings. +.P +If the subject line is successfully matched, the substrings extracted by the +convenience functions are output with C, G, or L after the string number +instead of a colon. This is in addition to the normal full list. The string +length (that is, the return from the extraction function) is given in +parentheses after each substring, followed by the name when the extraction was +by name. +. +. +.\" HTML +.SS "Testing the substitution function" +.rs +.sp +If the \fBreplace\fP modifier is set, the \fBpcre2_substitute()\fP function is +called instead of one of the matching functions (or after one call of +\fBpcre2_match()\fP in the case of PCRE2_SUBSTITUTE_MATCHED). Note that +replacement strings cannot contain commas, because a comma signifies the end of +a modifier. This is not thought to be an issue in a test program. +.P +Specifying a completely empty replacement string disables this modifier. +However, it is possible to specify an empty replacement by providing a buffer +length, as described below, for an otherwise empty replacement. +.P +Unlike subject strings, \fBpcre2test\fP does not process replacement strings +for escape sequences. In UTF mode, a replacement string is checked to see if it +is a valid UTF-8 string. If so, it is correctly converted to a UTF string of +the appropriate code unit width. If it is not a valid UTF-8 string, the +individual code units are copied directly. This provides a means of passing an +invalid UTF-8 string for testing purposes. +.P +The following modifiers set options (in additional to the normal match options) +for \fBpcre2_substitute()\fP: +.sp + global PCRE2_SUBSTITUTE_GLOBAL + substitute_extended PCRE2_SUBSTITUTE_EXTENDED + substitute_literal PCRE2_SUBSTITUTE_LITERAL + substitute_matched PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY +.sp +See the +.\" HREF +\fBpcre2api\fP +.\" +documentation for details of these options. +.P +After a successful substitution, the modified string is output, preceded by the +number of replacements. This may be zero if there were no matches. Here is a +simple example of a substitution test: +.sp + /abc/replace=xxx + =abc=abc= + 1: =xxx=abc= + =abc=abc=\e=global + 2: =xxx=xxx= +.sp +Subject and replacement strings should be kept relatively short (fewer than 256 +characters) for substitution tests, as fixed-size buffers are used. To make it +easy to test for buffer overflow, if the replacement string starts with a +number in square brackets, that number is passed to \fBpcre2_substitute()\fP as +the size of the output buffer, with the replacement string starting at the next +character. Here is an example that tests the edge case: +.sp + /abc/ + 123abc123\e=replace=[10]XYZ + 1: 123XYZ123 + 123abc123\e=replace=[9]XYZ + Failed: error -47: no more memory +.sp +The default action of \fBpcre2_substitute()\fP is to return +PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the +PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the +\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues +to go through the motions of matching and substituting (but not doing any +callouts), in order to compute the size of buffer that is required. When this +happens, \fBpcre2test\fP shows the required buffer length (which includes space +for the trailing zero) as part of the error message. For example: +.sp + /abc/substitute_overflow_length + 123abc123\e=replace=[9]XYZ + Failed: error -47: no more memory: 10 code units are needed +.sp +A replacement string is ignored with POSIX and DFA matching. Specifying partial +matching provokes an error return ("bad option value") from +\fBpcre2_substitute()\fP. +. +. +.SS "Testing substitute callouts" +.rs +.sp +If the \fBsubstitute_callout\fP modifier is set, a substitution callout +function is set up. The \fBnull_context\fP modifier must not be set, because +the address of the callout function is passed in a match context. When the +callout function is called (after each substitution), details of the input +and output strings are output. For example: +.sp + /abc/g,replace=<$0>,substitute_callout + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 "" + 2(1) Old 6 9 "abc" New 8 13 "" + 2: defpqr +.sp +The first number on each callout line is the count of matches. The +parenthesized number is the number of pairs that are set in the ovector (that +is, one more than the number of capturing groups that were set). Then are +listed the offsets of the old substring, its contents, and the same for the +replacement. +.P +By default, the substitution callout function returns zero, which accepts the +replacement and causes matching to continue if /g was used. Two further +modifiers can be used to test other return values. If \fBsubstitute_skip\fP is +set to a value greater than zero the callout function returns +1 for the match +of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the +replacement to be rejected, and -1 causes no further matching to take place. If +either of them are set, \fBsubstitute_callout\fP is assumed. For example: +.sp + /abc/g,replace=<$0>,substitute_skip=1 + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 " SKIPPED" + 2(1) Old 6 9 "abc" New 6 11 "" + 2: abcdefpqr + abcdefabcpqr\e=substitute_stop=1 + 1(1) Old 0 3 "abc" New 0 5 " STOPPED" + 1: abcdefabcpqr +.sp +If both are set for the same number, stop takes precedence. Only a single skip +or stop is supported, which is sufficient for testing that the feature works. +. +. +.SS "Testing substitute case callouts" +.rs +.sp +If the \fBsubstitute_case_callout\fP modifier is set, a substitution +case callout function is set up. The callout function is called for each +substituted chunk which is to be case-transformed. +.P +The callout function passed is a fixed function with implementation for certain +behaviours: inputs which shrink when case-transformed; inputs which grow; inputs +with distinct upper/lower/titlecase forms. The characters which are not +special-cased for testing purposes are left unmodified, as if they are caseless +characters. +. +. +.SS "Setting the JIT stack size" +.rs +.sp +The \fBjitstack\fP modifier provides a way of setting the maximum stack size +that is used by the just-in-time optimization code. It is ignored if JIT +optimization is not being used. The value is a number of kibibytes (units of +1024 bytes). Setting zero reverts to the default of 32KiB. Providing a stack +that is larger than the default is necessary only for very complicated +patterns. If \fBjitstack\fP is set non-zero on a subject line it overrides any +value that was set on the pattern. +. +. +.SS "Setting heap, match, and depth limits" +.rs +.sp +The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set +the appropriate limits in the match context. These values are ignored when the +\fBfind_limits\fP or \fBfind_limits_noheap\fP modifier is specified. +. +. +.SS "Finding minimum limits" +.rs +.sp +If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP +calls the relevant matching function several times, setting different values in +the match context via \fBpcre2_set_heap_limit()\fP, +\fBpcre2_set_match_limit()\fP, or \fBpcre2_set_depth_limit()\fP until it finds +the smallest value for each parameter that allows the match to complete without +a "limit exceeded" error. The match itself may succeed or fail. An alternative +modifier, \fBfind_limits_noheap\fP, omits the heap limit. This is used in the +standard tests, because the minimum heap limit varies between systems. If JIT +is being used, only the match limit is relevant, and the other two are +automatically omitted. +.P +When using this modifier, the pattern should not contain any limit settings +such as (*LIMIT_MATCH=...) within it. If such a setting is present and is +lower than the minimum matching value, the minimum value cannot be found +because \fBpcre2_set_match_limit()\fP etc. are only able to reduce the value of +an in-pattern limit; they cannot increase it. +.P +For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how +much nested backtracking happens (that is, how deeply the pattern's tree is +searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of +recursive calls of the internal function that is used for handling pattern +recursion, lookaround assertions, and atomic groups. +.P +For non-DFA matching, the \fImatch_limit\fP number is a measure of the amount +of backtracking that takes place, and learning the minimum value can be +instructive. For most simple matches, the number is quite small, but for +patterns with very large numbers of matching possibilities, it can become large +very quickly with increasing length of subject string. In the case of DFA +matching, \fImatch_limit\fP controls the total number of calls, both recursive +and non-recursive, to the internal matching function, thus controlling the +overall amount of computing resource that is used. +.P +For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes +(units of 1024 bytes), limits the amount of heap memory used for matching. +. +. +.SS "Showing MARK names" +.rs +.sp +.P +The \fBmark\fP modifier causes the names from backtracking control verbs that +are returned from calls to \fBpcre2_match()\fP to be displayed. If a mark is +returned for a match, non-match, or partial match, \fBpcre2test\fP shows it. +For a match, it is on a line by itself, tagged with "MK:". Otherwise, it +is added to the non-match message. +. +. +.SS "Showing memory usage" +.rs +.sp +The \fBmemory\fP modifier causes \fBpcre2test\fP to log the sizes of all heap +memory allocation and freeing calls that occur during a call to +\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. In the latter case, heap memory +is used only when a match requires more internal workspace that the default +allocation on the stack, so in many cases there will be no output. No heap +memory is allocated during matching with JIT. For this modifier to work, the +\fBnull_context\fP modifier must not be set on both the pattern and the +subject, though it can be set on one or the other. +. +. +.SS "Showing the heap frame overall vector size" +.rs +.sp +The \fBheapframes_size\fP modifier is relevant for matches using +\fBpcre2_match()\fP without JIT. After a match has run (whether successful or +not) the size, in bytes, of the allocated heap frames vector that is left +attached to the match data block is shown. If the matching action involved +several calls to \fBpcre2_match()\fP (for example, global matching or for +timing) only the final value is shown. +.P +This modifier is ignored, with a warning, for POSIX or DFA matching. JIT +matching does not use the heap frames vector, so the size is always zero, +unless there was a previous non-JIT match. Note that specifing a size of zero +for the output vector (see below) causes \fBpcre2test\fP to free its match data +block (and associated heap frames vector) and allocate a new one. +. +. +.SS "Setting a starting offset" +.rs +.sp +The \fBoffset\fP modifier sets an offset in the subject string at which +matching starts. Its value is a number of code units, not characters. +. +. +.SS "Setting an offset limit" +.rs +.sp +The \fBoffset_limit\fP modifier sets a limit for unanchored matches. If a match +cannot be found starting at or before this offset in the subject, a "no match" +return is given. The data value is a number of code units, not characters. When +this modifier is used, the \fBuse_offset_limit\fP modifier must have been set +for the pattern; if not, an error is generated. +. +. +.SS "Setting the size of the output vector" +.rs +.sp +The \fBovector\fP modifier applies only to the subject line in which it +appears, though of course it can also be used to set a default in a +\fB#subject\fP command. It specifies the number of pairs of offsets that are +available for storing matching information. The default is 15. +.P +A value of zero is useful when testing the POSIX API because it causes +\fBregexec()\fP to be called with a NULL capture vector. When not testing the +POSIX API, a value of zero is used to cause +\fBpcre2_match_data_create_from_pattern()\fP to be called, in order to create a +new match block of exactly the right size for the pattern. (It is not possible +to create a match block with a zero-length ovector; there is always at least +one pair of offsets.) The old match data block is freed. +. +. +.SS "Passing the subject as zero-terminated" +.rs +.sp +By default, the subject string is passed to a native API matching function with +its correct length. In order to test the facility for passing a zero-terminated +string, the \fBzero_terminate\fP modifier is provided. It causes the length to +be passed as PCRE2_ZERO_TERMINATED. When matching via the POSIX interface, +this modifier is ignored, with a warning. +.P +When testing \fBpcre2_substitute()\fP, this modifier also has the effect of +passing the replacement string as zero-terminated. +. +. +.SS "Passing a NULL context, subject, or replacement" +.rs +.sp +Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP, +\fBpcre2_dfa_match()\fP, \fBpcre2_jit_match()\fP or \fBpcre2_substitute()\fP. +If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for +testing that the matching and substitution functions behave correctly in this +case (they use default values). This modifier cannot be used with the +\fBfind_limits\fP, \fBfind_limits_noheap\fP, or \fBsubstitute_callout\fP +modifiers. +.P +Similarly, for testing purposes, if the \fBnull_subject\fP or +\fBnull_replacement\fP modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. +. +. +.SH "THE ALTERNATIVE MATCHING FUNCTION" +.rs +.sp +By default, \fBpcre2test\fP uses the standard PCRE2 matching function, +\fBpcre2_match()\fP to match each subject line. PCRE2 also supports an +alternative matching function, \fBpcre2_dfa_match()\fP, which operates in a +different way, and has some restrictions. The differences between the two +functions are described in the +.\" HREF +\fBpcre2matching\fP +.\" +documentation. +.P +If the \fBdfa\fP modifier is set, the alternative matching function is used. +This function finds all possible matches at a given point in the subject. If, +however, the \fBdfa_shortest\fP modifier is set, processing stops after the +first match is found. This is always the shortest possible match. +. +. +.SH "DEFAULT OUTPUT FROM pcre2test" +.rs +.sp +This section describes the output when the normal matching function, +\fBpcre2_match()\fP, is being used. +.P +When a match succeeds, \fBpcre2test\fP outputs the list of captured substrings, +starting with number 0 for the string that matched the whole pattern. +Otherwise, it outputs "No match" when the return is PCRE2_ERROR_NOMATCH, or +"Partial match:" followed by the partially matching substring when the +return is PCRE2_ERROR_PARTIAL. (Note that this is the +entire substring that was inspected during the partial match; it may include +characters before the actual match start if a lookbehind assertion, \eK, \eb, +or \eB was involved.) +.P +For any other return, \fBpcre2test\fP outputs the PCRE2 negative error number +and a short descriptive phrase. If the error is a failed UTF string check, the +code unit offset of the start of the failing character is also output. Here is +an example of an interactive \fBpcre2test\fP run. +.sp + $ pcre2test + PCRE2 version 10.22 2016-07-29 +.sp + re> /^abc(\ed+)/ + data> abc123 + 0: abc123 + 1: 123 + data> xyz + No match +.sp +Unset capturing substrings that are not followed by one that is set are not +shown by \fBpcre2test\fP unless the \fBallcaptures\fP modifier is specified. In +the following example, there are two capturing substrings, but when the first +data line is matched, the second, unset substring is not shown. An "internal" +unset substring is shown as "", as for the second data line. +.sp + re> /(a)|(b)/ + data> a + 0: a + 1: a + data> b + 0: b + 1: + 2: b +.sp +If the strings contain any non-printing characters, they are output as \exhh +escapes if the value is less than 256 and UTF mode is not set. Otherwise they +are output as \ex{hh...} escapes. See below for the definition of non-printing +characters. If the \fBaftertext\fP modifier is set, the output for substring 0 +is followed by the rest of the subject string, identified by "0+" like this: +.sp + re> /cat/aftertext + data> cataract + 0: cat + 0+ aract +.sp +If global matching is requested, the results of successive matching attempts +are output in sequence, like this: +.sp + re> /\eBi(\ew\ew)/g + data> Mississippi + 0: iss + 1: ss + 0: iss + 1: ss + 0: ipp + 1: pp +.sp +"No match" is output only if the first match attempt fails. Here is an example +of a failure message (the offset 4 that is specified by the \fBoffset\fP +modifier is past the end of the subject string): +.sp + re> /xyz/ + data> xyz\e=offset=4 + Error -24 (bad offset value) +.P +Note that whereas patterns can be continued over several lines (a plain ">" +prompt is used for continuations), subject lines may not. However newlines can +be included in a subject by means of the \en escape (or \er, \er\en, etc., +depending on the newline sequence setting). +. +. +. +.SH "OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION" +.rs +.sp +When the alternative matching function, \fBpcre2_dfa_match()\fP, is used, the +output consists of a list of all the matches that start at the first point in +the subject where there is at least one match. For example: +.sp + re> /(tang|tangerine|tan)/ + data> yellow tangerine\e=dfa + 0: tangerine + 1: tang + 2: tan +.sp +Using the normal matching function on this data finds only "tang". The +longest matching string is always given first (and numbered zero). After a +PCRE2_ERROR_PARTIAL return, the output is "Partial match:", followed by the +partially matching substring. Note that this is the entire substring that was +inspected during the partial match; it may include characters before the actual +match start if a lookbehind assertion, \eb, or \eB was involved. (\eK is not +supported for DFA matching.) +.P +If global matching is requested, the search for further matches resumes +at the end of the longest match. For example: +.sp + re> /(tang|tangerine|tan)/g + data> yellow tangerine and tangy sultana\e=dfa + 0: tangerine + 1: tang + 2: tan + 0: tang + 1: tan + 0: tan +.sp +The alternative matching function does not support substring capture, so the +modifiers that are concerned with captured substrings are not relevant. +. +. +.SH "RESTARTING AFTER A PARTIAL MATCH" +.rs +.sp +When the alternative matching function has given the PCRE2_ERROR_PARTIAL +return, indicating that the subject partially matched the pattern, you can +restart the match with additional subject data by means of the +\fBdfa_restart\fP modifier. For example: +.sp + re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/ + data> 23ja\e=ps,dfa + Partial match: 23ja + data> n05\e=dfa,dfa_restart + 0: n05 +.sp +For further information about partial matching, see the +.\" HREF +\fBpcre2partial\fP +.\" +documentation. +. +. +.\" HTML +.SH CALLOUTS +.rs +.sp +If the pattern contains any callout requests, \fBpcre2test\fP's callout +function is called during matching unless \fBcallout_none\fP is specified. This +works with both matching functions, and with JIT, though there are some +differences in behaviour. The output for callouts with numerical arguments and +those with string arguments is slightly different. +. +. +.SS "Callouts with numerical arguments" +.rs +.sp +By default, the callout function displays the callout number, the start and +current positions in the subject text at the callout time, and the next pattern +item to be tested. For example: +.sp + --->pqrabcdef + 0 ^ ^ \ed +.sp +This output indicates that callout number 0 occurred for a match attempt +starting at the fourth character of the subject string, when the pointer was at +the seventh character, and when the next pattern item was \ed. Just +one circumflex is output if the start and current positions are the same, or if +the current position precedes the start position, which can happen if the +callout is in a lookbehind assertion. +.P +Callouts numbered 255 are assumed to be automatic callouts, inserted as a +result of the \fBauto_callout\fP pattern modifier. In this case, instead of +showing the callout number, the offset in the pattern, preceded by a plus, is +output. For example: +.sp + re> /\ed?[A-E]\e*/auto_callout + data> E* + --->E* + +0 ^ \ed? + +3 ^ [A-E] + +8 ^^ \e* + +10 ^ ^ + 0: E* +.sp +If a pattern contains (*MARK) items, an additional line is output whenever +a change of latest mark is passed to the callout function. For example: +.sp + re> /a(*MARK:X)bc/auto_callout + data> abc + --->abc + +0 ^ a + +1 ^^ (*MARK:X) + +10 ^^ b + Latest Mark: X + +11 ^ ^ c + +12 ^ ^ + 0: abc +.sp +The mark changes between matching "a" and "b", but stays the same for the rest +of the match, so nothing more is output. If, as a result of backtracking, the +mark reverts to being unset, the text "" is output. +. +. +.SS "Callouts with string arguments" +.rs +.sp +The output for a callout with a string argument is similar, except that instead +of outputting a callout number before the position indicators, the callout +string and its offset in the pattern string are output before the reflection of +the subject string, and the subject string is reflected for each callout. For +example: +.sp + re> /^ab(?C'first')cd(?C"second")ef/ + data> abcdefg + Callout (7): 'first' + --->abcdefg + ^ ^ c + Callout (20): "second" + --->abcdefg + ^ ^ e + 0: abcdef +.sp +. +. +.SS "Callout modifiers" +.rs +.sp +The callout function in \fBpcre2test\fP returns zero (carry on matching) by +default, but you can use a \fBcallout_fail\fP modifier in a subject line to +change this and other parameters of the callout (see below). +.P +If the \fBcallout_capture\fP modifier is set, the current captured groups are +output when a callout occurs. This is useful only for non-DFA matching, as +\fBpcre2_dfa_match()\fP does not support capturing, so no captures are ever +shown. +.P +The normal callout output, showing the callout number or pattern offset (as +described above) is suppressed if the \fBcallout_no_where\fP modifier is set. +.P +When using the interpretive matching function \fBpcre2_match()\fP without JIT, +setting the \fBcallout_extra\fP modifier causes additional output from +\fBpcre2test\fP's callout function to be generated. For the first callout in a +match attempt at a new starting position in the subject, "New match attempt" is +output. If there has been a backtrack since the last callout (or start of +matching if this is the first callout), "Backtrack" is output, followed by "No +other matching paths" if the backtrack ended the previous match attempt. For +example: +.sp + re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess + data> aac\e=callout_extra + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^ ^ ) + +4 ^ ^ b + Backtrack + --->aac + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + No match +.sp +Notice that various optimizations must be turned off if you want all possible +matching paths to be scanned. If \fBno_start_optimize\fP is not used, there is +an immediate "no match", without any callouts, because the starting +optimization fails to find "b" in the subject, which it knows must be present +for any match. If \fBno_auto_possess\fP is not used, the "a+" item is turned +into "a++", which reduces the number of backtracks. +.P +The \fBcallout_extra\fP modifier has no effect if used with the DFA matching +function, or with JIT. +. +. +.SS "Return values from callouts" +.rs +.sp +The default return from the callout function is zero, which allows matching to +continue. The \fBcallout_fail\fP modifier can be given one or two numbers. If +there is only one number, 1 is returned instead of 0 (causing matching to +backtrack) when a callout of that number is reached. If two numbers (:) +are given, 1 is returned when callout is reached and there have been at +least callouts. The \fBcallout_error\fP modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +\fBcallout_error\fP takes precedence. Note that callouts with string arguments +are always given the number zero. +.P +The \fBcallout_data\fP modifier can be given an unsigned or a negative number. +This is set as the "user data" that is passed to the matching function, and +passed back when the callout function is invoked. Any value other than zero is +used as a return from \fBpcre2test\fP's callout function. +.P +Inserting callouts can be helpful when using \fBpcre2test\fP to check +complicated regular expressions. For further information about callouts, see +the +.\" HREF +\fBpcre2callout\fP +.\" +documentation. +. +. +. +.SH "NON-PRINTING CHARACTERS" +.rs +.sp +When \fBpcre2test\fP is outputting text in the compiled version of a pattern, +bytes other than 32-126 are always treated as non-printing characters and are +therefore shown as hex escapes. +.P +When \fBpcre2test\fP is outputting text that is a matched part of a subject +string, it behaves in the same way, unless a different locale has been set for +the pattern (using the \fBlocale\fP modifier). In this case, the +\fBisprint()\fP function is used to distinguish printing and non-printing +characters. +. +. +. +.\" HTML +.SH "SAVING AND RESTORING COMPILED PATTERNS" +.rs +.sp +It is possible to save compiled patterns on disc or elsewhere, and reload them +later, subject to a number of restrictions. JIT data cannot be saved. The host +on which the patterns are reloaded must be running the same version of PCRE2, +with the same code unit width, and must also have the same endianness, pointer +width and PCRE2_SIZE type. Before compiled patterns can be saved they must be +serialized, that is, converted to a stream of bytes. A single byte stream may +contain any number of compiled patterns, but they must all use the same +character tables. A single copy of the tables is included in the byte stream +(its size is 1088 bytes). +.P +The functions whose names begin with \fBpcre2_serialize_\fP are used +for serializing and de-serializing. They are described in the +.\" HREF +\fBpcre2serialize\fP +.\" +documentation. In this section we describe the features of \fBpcre2test\fP that +can be used to test these functions. +.P +Note that "serialization" in PCRE2 does not convert compiled patterns to an +abstract format like Java or .NET. It just makes a reloadable byte code stream. +Hence the restrictions on reloading mentioned above. +.P +In \fBpcre2test\fP, when a pattern with \fBpush\fP modifier is successfully +compiled, it is pushed onto a stack of compiled patterns, and \fBpcre2test\fP +expects the next line to contain a new pattern (or command) instead of a +subject line. By contrast, the \fBpushcopy\fP modifier causes a copy of the +compiled pattern to be stacked, leaving the original available for immediate +matching. By using \fBpush\fP and/or \fBpushcopy\fP, a number of patterns can +be compiled and retained. These modifiers are incompatible with \fBposix\fP, +and control modifiers that act at match time are ignored (with a message) for +the stacked patterns. The \fBjitverify\fP modifier applies only at compile +time. +.P +The command +.sp + #save +.sp +causes all the stacked patterns to be serialized and the result written to the +named file. Afterwards, all the stacked patterns are freed. The command +.sp + #load +.sp +reads the data in the file, and then arranges for it to be de-serialized, with +the resulting compiled patterns added to the pattern stack. The pattern on the +top of the stack can be retrieved by the #pop command, which must be followed +by lines of subjects that are to be matched with the pattern, terminated as +usual by an empty line or end of file. This command may be followed by a +modifier list containing only +.\" HTML +.\" +control modifiers +.\" +that act after a pattern has been compiled. In particular, \fBhex\fP, +\fBposix\fP, \fBposix_nosub\fP, \fBpush\fP, and \fBpushcopy\fP are not allowed, +nor are any +.\" HTML +.\" +option-setting modifiers. +.\" +The JIT modifiers are, however permitted. Here is an example that saves and +reloads two patterns. +.sp + /abc/push + /xyz/push + #save tempfile + #load tempfile + #pop info + xyz +.sp + #pop jit,bincode + abc +.sp +If \fBjitverify\fP is used with #pop, it does not automatically imply +\fBjit\fP, which is different behaviour from when it is used on a pattern. +.P +The #popcopy command is analogous to the \fBpushcopy\fP modifier in that it +makes current a copy of the topmost stack pattern, leaving the original still +on the stack. +. +. +. +.SH "SEE ALSO" +.rs +.sp +\fBpcre2\fP(3), \fBpcre2api\fP(3), \fBpcre2callout\fP(3), +\fBpcre2jit\fP, \fBpcre2matching\fP(3), \fBpcre2partial\fP(d), +\fBpcre2pattern\fP(3), \fBpcre2serialize\fP(3). +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 26 December 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/doc/pcre2test.txt b/3rd/pcre2/doc/pcre2test.txt new file mode 100644 index 00000000..b6574b2e --- /dev/null +++ b/3rd/pcre2/doc/pcre2test.txt @@ -0,0 +1,2068 @@ +PCRE2TEST(1) General Commands Manual PCRE2TEST(1) + + +NAME + pcre2test - a program for testing Perl-compatible regular expressions. + + +SYNOPSIS + + pcre2test [options] [input file [output file]] + + pcre2test is a test program for the PCRE2 regular expression libraries, + but it can also be used for experimenting with regular expressions. + This document describes the features of the test program; for details + of the regular expressions themselves, see the pcre2pattern documenta- + tion. For details of the PCRE2 library function calls and their op- + tions, see the pcre2api documentation. + + The input for pcre2test is a sequence of regular expression patterns + and subject strings to be matched. There are also command lines for + setting defaults and controlling some special actions. The output shows + the result of each match attempt. Modifiers on external or internal + command lines, the patterns, and the subject lines specify PCRE2 func- + tion options, control how the subject is processed, and what output is + produced. + + There are many obscure modifiers, some of which are specifically de- + signed for use in conjunction with the test script and data files that + are distributed as part of PCRE2. All the modifiers are documented + here, some without much justification, but many of them are unlikely to + be of use except when testing the libraries. + + +PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES + + Different versions of the PCRE2 library can be built to support charac- + ter strings that are encoded in 8-bit, 16-bit, or 32-bit code units. + One, two, or all three of these libraries may be simultaneously in- + stalled. The pcre2test program can be used to test all the libraries. + However, its own input and output are always in 8-bit format. When + testing the 16-bit or 32-bit libraries, patterns and subject strings + are converted to 16-bit or 32-bit format before being passed to the li- + brary functions. Results are converted back to 8-bit code units for + output. + + In the rest of this document, the names of library functions and struc- + tures are given in generic form, for example, pcre2_compile(). The ac- + tual names used in the libraries have a suffix _8, _16, or _32, as ap- + propriate. + + +INPUT ENCODING + + Input to pcre2test is processed line by line, either by calling the C + library's fgets() function, or via the libreadline or libedit library. + In some Windows environments character 26 (hex 1A) causes an immediate + end of file, and no further data is read, so this character should be + avoided unless you really want that action. + + The input is processed using C's string functions, so must not contain + binary zeros, even though in Unix-like environments, fgets() treats any + bytes other than newline as data characters. An error is generated if a + binary zero is encountered. By default subject lines are processed for + backslash escapes, which makes it possible to include any data value in + strings that are passed to the library for matching. For patterns, + there is a facility for specifying some or all of the 8-bit input char- + acters as hexadecimal pairs, which makes it possible to include binary + zeros. + + Input for the 16-bit and 32-bit libraries + + When testing the 16-bit or 32-bit libraries, there is a need to be able + to generate character code points greater than 255 in the strings that + are passed to the library. For subject lines and some patterns, back- + slash escapes can be used. In addition, when the utf modifier (see + "Setting compilation options" below) is set, the pattern and any fol- + lowing subject lines are interpreted as UTF-8 strings and translated to + UTF-16 or UTF-32 as appropriate. + + For non-UTF testing of wide characters, the utf8_input modifier can be + used. This is mutually exclusive with utf, and is allowed only in + 16-bit or 32-bit mode. It causes the pattern and following subject + lines to be treated as UTF-8 according to the original definition (RFC + 2279), which allows for character values up to 0x7fffffff. Each charac- + ter is placed in one 16-bit or 32-bit code unit (in the 16-bit case, + values greater than 0xffff cause an error to occur). + + UTF-8 (in its original definition) is not capable of encoding values + greater than 0x7fffffff, but such values can be handled by the 32-bit + library. When testing this library in non-UTF mode with utf8_input set, + if any character is preceded by the byte 0xff (which is an invalid byte + in UTF-8) 0x80000000 is added to the character's value. For subject + strings, using an escape sequence is preferable. + + +COMMAND LINE OPTIONS + + -8 If the 8-bit library has been built, this option causes it to + be used (this is the default). If the 8-bit library has not + been built, this option causes an error. + + -16 If the 16-bit library has been built, this option causes it + to be used. If the 8-bit library has not been built, this is + the default. If the 16-bit library has not been built, this + option causes an error. + + -32 If the 32-bit library has been built, this option causes it + to be used. If no other library has been built, this is the + default. If the 32-bit library has not been built, this op- + tion causes an error. + + -ac Behave as if each pattern has the auto_callout modifier, that + is, insert automatic callouts into every pattern that is com- + piled. + + -AC As for -ac, but in addition behave as if each subject line + has the callout_extra modifier, that is, show additional in- + formation from callouts. + + -b Behave as if each pattern has the fullbincode modifier; the + full internal binary form of the pattern is output after com- + pilation. + + -C Output the version number of the PCRE2 library, and all + available information about the optional features that are + included, and then exit with zero exit code. All other op- + tions are ignored. If both -C and -LM are present, whichever + is first is recognized. + + -C option Output information about a specific build-time option, then + exit. This functionality is intended for use in scripts such + as RunTest. The following options output the value and set + the exit code as indicated: + + ebcdic-nl the code for LF (= NL) in an EBCDIC environment: + either 0x15 or 0x25 + 0 if used in an ASCII/Unicode environment + exit code is always 0 + linksize the configured internal link size (2, 3, or 4) + exit code is set to the link size + newline the default newline setting: + CR, LF, CRLF, ANYCRLF, ANY, or NUL + exit code is always 0 + bsr the default setting for what \R matches: + ANYCRLF or ANY + exit code is always 0 + + The following options output 1 for true or 0 for false, and + set the exit code to the same value: + + backslash-C \C is supported (not locked out) + ebcdic compiled for an EBCDIC environment + jit just-in-time support is available + pcre2-16 the 16-bit library was built + pcre2-32 the 32-bit library was built + pcre2-8 the 8-bit library was built + unicode Unicode support is available + + Note that the availability of JIT support in the library does + not guarantee that it can actually be used because in some + environments it is unable to allocate executable memory. The + option "jitusable" gives more detailed information. It re- + turns one of the following values: + + 0 JIT is available and usable + 1 JIT is available but cannot allocate executable memory + 2 JIT is not available + 3 Unexpected return from test call to pcre2_jit_compile() + + If an unknown option is given, an error message is output; + the exit code is 0. + + -d Behave as if each pattern has the debug modifier; the inter- + nal form and information about the compiled pattern is output + after compilation; -d is equivalent to -b -i. + + -dfa Behave as if each subject line has the dfa modifier; matching + is done using the pcre2_dfa_match() function instead of the + default pcre2_match(). + + -error number[,number,...] + Call pcre2_get_error_message() for each of the error numbers + in the comma-separated list, display the resulting messages + on the standard output, then exit with zero exit code. The + numbers may be positive or negative. This is a convenience + facility for PCRE2 maintainers. + + -help Output a brief summary these options and then exit. + + -i Behave as if each pattern has the info modifier; information + about the compiled pattern is given after compilation. + + -jit Behave as if each pattern line has the jit modifier; after + successful compilation, each pattern is passed to the just- + in-time compiler, if available. + + -jitfast Behave as if each pattern line has the jitfast modifier; af- + ter successful compilation, each pattern is passed to the + just-in-time compiler, if available, and each subject line is + passed directly to the JIT matcher via its "fast path". + + -jitverify + Behave as if each pattern line has the jitverify modifier; + after successful compilation, each pattern is passed to the + just-in-time compiler, if available, and the use of JIT for + matching is verified. + + -LM List modifiers: write a list of available pattern and subject + modifiers to the standard output, then exit with zero exit + code. All other options are ignored. If both -C and any -Lx + options are present, whichever is first is recognized. + + -LP List properties: write a list of recognized Unicode proper- + ties to the standard output, then exit with zero exit code. + All other options are ignored. If both -C and any -Lx options + are present, whichever is first is recognized. + + -LS List scripts: write a list of recognized Unicode script names + to the standard output, then exit with zero exit code. All + other options are ignored. If both -C and any -Lx options are + present, whichever is first is recognized. + + -pattern modifier-list + Behave as if each pattern line contains the given modifiers. + + -q Do not output the version number of pcre2test at the start of + execution. + + -S size On Unix-like systems, set the size of the run-time stack to + size mebibytes (units of 1024*1024 bytes). + + -subject modifier-list + Behave as if each subject line contains the given modifiers. + + -t Run each compile and match many times with a timer, and out- + put the resulting times per compile or match. When JIT is + used, separate times are given for the initial compile and + the JIT compile. You can control the number of iterations + that are used for timing by following -t with a number (as a + separate item on the command line). For example, "-t 1000" + iterates 1000 times. The default is to iterate 500,000 times. + + -tm This is like -t except that it times only the matching phase, + not the compile phase. + + -T -TM These behave like -t and -tm, but in addition, at the end of + a run, the total times for all compiles and matches are out- + put. + + -version Output the PCRE2 version number and then exit. + + +DESCRIPTION + + If pcre2test is given two filename arguments, it reads from the first + and writes to the second. If the first name is "-", input is taken from + the standard input. If pcre2test is given only one argument, it reads + from that file and writes to stdout. Otherwise, it reads from stdin and + writes to stdout. + + When pcre2test is built, a configuration option can specify that it + should be linked with the libreadline or libedit library. When this is + done, if the input is from a terminal, it is read using the readline() + function. This provides line-editing and history facilities. The output + from the -help option states whether or not readline() will be used. + + The program handles any number of tests, each of which consists of a + set of input lines. Each set starts with a regular expression pattern, + followed by any number of subject lines to be matched against that pat- + tern. In between sets of test data, command lines that begin with # may + appear. This file format, with some restrictions, can also be processed + by the perltest.sh script that is distributed with PCRE2 as a means of + checking that the behaviour of PCRE2 and Perl is the same. For a speci- + fication of perltest.sh, see the comments near its beginning. See also + the #perltest command below. + + When the input is a terminal, pcre2test prompts for each line of input, + using "re>" to prompt for regular expression patterns, and "data>" to + prompt for subject lines. Command lines starting with # can be entered + only in response to the "re>" prompt. + + Each subject line is matched separately and independently. If you want + to do multi-line matches, you have to use the \n escape sequence (or \r + or \r\n, etc., depending on the newline setting) in a single line of + input to encode the newline sequences. There is no limit on the length + of subject lines; the input buffer is automatically extended if it is + too small. There are replication features that makes it possible to + generate long repetitive pattern or subject lines without having to + supply them explicitly. + + An empty line or the end of the file signals the end of the subject + lines for a test, at which point a new pattern or command line is ex- + pected if there is still input to be read. + + +COMMAND LINES + + In between sets of test data, a line that begins with # is interpreted + as a command line. If the first character is followed by white space or + an exclamation mark, the line is treated as a comment, and ignored. + Otherwise, the following commands are recognized: + + #forbid_utf + + Subsequent patterns automatically have the PCRE2_NEVER_UTF and + PCRE2_NEVER_UCP options set, which locks out the use of the PCRE2_UTF + and PCRE2_UCP options and the use of (*UTF) and (*UCP) at the start of + patterns. This command also forces an error if a subsequent pattern + contains any occurrences of \P, \p, or \X, which are still supported + when PCRE2_UTF is not set, but which require Unicode property support + to be included in the library. + + This is a trigger guard that is used in test files to ensure that UTF + or Unicode property tests are not accidentally added to files that are + used when Unicode support is not included in the library. Setting + PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as a default can also be obtained + by the use of #pattern; the difference is that #forbid_utf cannot be + unset, and the automatic options are not displayed in pattern informa- + tion, to avoid cluttering up test output. + + #load + + This command is used to load a set of precompiled patterns from a file, + as described in the section entitled "Saving and restoring compiled + patterns" below. + + #loadtables + + This command is used to load a set of binary character tables that can + be accessed by the tables=3 qualifier. Such tables can be created by + the pcre2_dftables program with the -b option. + + #newline_default [] + + When PCRE2 is built, a default newline convention can be specified. + This determines which characters and/or character pairs are recognized + as indicating a newline in a pattern or subject string. The default can + be overridden when a pattern is compiled. The standard test files con- + tain tests of various newline conventions, but the majority of the + tests expect a single linefeed to be recognized as a newline by de- + fault. Without special action the tests would fail when PCRE2 is com- + piled with either CR or CRLF as the default newline. + + The #newline_default command specifies a list of newline types that are + acceptable as the default. The types must be one of CR, LF, CRLF, ANY- + CRLF, ANY, or NUL (in upper or lower case), for example: + + #newline_default LF Any anyCRLF + + If the default newline is in the list, this command has no effect. Oth- + erwise, except when testing the POSIX API, a newline modifier that + specifies the first newline convention in the list (LF in the above ex- + ample) is added to any pattern that does not already have a newline + modifier. If the newline list is empty, the feature is turned off. This + command is present in a number of the standard test input files. + + When the POSIX API is being tested there is no way to override the de- + fault newline convention, though it is possible to set the newline con- + vention from within the pattern. A warning is given if the posix or + posix_nosub modifier is used when #newline_default would set a default + for the non-POSIX API. + + #pattern + + This command sets a default modifier list that applies to all subse- + quent patterns. Modifiers on a pattern can change these settings. + + #perltest + + This line is used in test files that can also be processed by perl- + test.sh to confirm that Perl gives the same results as PCRE2. Subse- + quent tests are checked for the use of pcre2test features that are in- + compatible with the perltest.sh script. + + Patterns must use '/' as their delimiter, and only certain modifiers + are supported. Comment lines, #pattern commands, and #subject commands + that set or unset "mark" are recognized and acted on. The #perltest, + #forbid_utf, and #newline_default commands, which are needed in the + relevant pcre2test files, are silently ignored. All other command lines + are ignored, but give a warning message. The #perltest command helps + detect tests that are accidentally put in the wrong file or use the + wrong delimiter. For more details of the perltest.sh script see the + comments it contains. + + #pop [] + #popcopy [] + + These commands are used to manipulate the stack of compiled patterns, + as described in the section entitled "Saving and restoring compiled + patterns" below. + + #save + + This command is used to save a set of compiled patterns to a file, as + described in the section entitled "Saving and restoring compiled pat- + terns" below. + + #subject + + This command sets a default modifier list that applies to all subse- + quent subject lines. Modifiers on a subject line can change these set- + tings. + + +MODIFIER SYNTAX + + Modifier lists are used with both pattern and subject lines. Items in a + list are separated by commas followed by optional white space. Trailing + whitespace in a modifier list is ignored. Some modifiers may be given + for both patterns and subject lines, whereas others are valid only for + one or the other. Each modifier has a long name, for example "an- + chored", and some of them must be followed by an equals sign and a + value, for example, "offset=12". Values cannot contain comma charac- + ters, but may contain spaces. Modifiers that do not take values may be + preceded by a minus sign to turn off a previous setting. + + A few of the more common modifiers can also be specified as single let- + ters, for example "i" for "caseless". In documentation, following the + Perl convention, these are written with a slash ("the /i modifier") for + clarity. Abbreviated modifiers must all be concatenated in the first + item of a modifier list. If the first item is not recognized as a long + modifier name, it is interpreted as a sequence of these abbreviations. + For example: + + /abc/ig,newline=cr,jit=3 + + This is a pattern line whose modifier list starts with two one-letter + modifiers (/i and /g). The lower-case abbreviated modifiers are the + same as used in Perl. + + +PATTERN SYNTAX + + A pattern line must start with one of the following characters (common + symbols, excluding pattern meta-characters): + + / ! " ' ` - = _ : ; , % & @ ~ + + This is interpreted as the pattern's delimiter. A regular expression + may be continued over several input lines, in which case the newline + characters are included within it. It is possible to include the delim- + iter as a literal within the pattern by escaping it with a backslash, + for example + + /abc\/def/ + + If you do this, the escape and the delimiter form part of the pattern, + but since the delimiters are all non-alphanumeric, the inclusion of the + backslash does not affect the pattern's interpretation. Note, however, + that this trick does not work within \Q...\E literal bracketing because + the backslash will itself be interpreted as a literal. If the terminat- + ing delimiter is immediately followed by a backslash, for example, + + /abc/\ + + a backslash is added to the end of the pattern. This is done to provide + a way of testing the error condition that arises if a pattern finishes + with a backslash, because + + /abc\/ + + is interpreted as the first line of a pattern that starts with "abc/", + causing pcre2test to read the next line as a continuation of the regu- + lar expression. + + A pattern can be followed by a modifier list (details below). + + +SUBJECT LINE SYNTAX + + Before each subject line is passed to pcre2_match(), pcre2_dfa_match(), + or pcre2_jit_match(), leading and trailing white space is removed, and + the line is scanned for backslash escapes, unless the subject_literal + modifier was set for the pattern. The following provide a means of en- + coding non-printing characters in a visible way: + + \a alarm (BEL, \x07) + \b backspace (\x08) + \e escape (\x27) + \f form feed (\x0c) + \n newline (\x0a) + \N{U+hh...} unicode character (any number of hex digits) + \r carriage return (\x0d) + \t tab (\x09) + \v vertical tab (\x0b) + \ddd octal number (up to 3 octal digits); represent a single + code point unless larger than 255 with the 8-bit li- + brary + \o{dd...} octal number (any number of octal digits} representing a + character in UTF mode or a code point + \xhh hexadecimal byte (up to 2 hex digits) + \x{hh...} hexadecimal number (up to 8 hex digits) representing a + character in UTF mode or a code point + + Invoking \N{U+hh...} or \x{hh...} doesn't require the use of the utf + modifier on the pattern. It is always recognized. There may be any num- + ber of hexadecimal digits inside the braces; invalid values provoke er- + ror messages but when using \N{U+hh...} with some invalid unicode char- + acters they will be accepted with a warning instead. + + Note that even in UTF-8 mode, \xhh (and depending of how large, \ddd) + describe one byte rather than one character; this makes it possible to + construct invalid UTF-8 sequences for testing purposes. On the other + hand, \x{hh...} is interpreted as a UTF-8 character in UTF-8 mode, only + generating more than one byte if the value is greater than 127. To + avoid the ambiguity it is preferred to use \N{U+hh...} when describing + characters. When testing the 8-bit library not in UTF-8 mode, \x{hh} + generates one byte for values that could fit on it, and causes an error + for greater values. + + When testing the 16-bit library, not in UTF-16 mode, all 4-digit + \x{hhhh} values are accepted. This makes it possible to construct in- + valid UTF-16 sequences for testing purposes. + + When testing the 32-bit library, not in UTF-32 mode, all 4 to 8-digit + \x{...} values are accepted. This makes it possible to construct in- + valid UTF-32 sequences for testing purposes. + + There is a special backslash sequence that specifies replication of one + or more characters: + + \[]{} + + This makes it possible to test long strings without having to provide + them as part of the file. For example: + + \[abc]{4} + + is converted to "abcabcabcabc". This feature does not support nesting. + To include a closing square bracket in the characters, code it as \x5D. + + A backslash followed by an equals sign marks the end of the subject + string and the start of a modifier list. For example: + + abc\=notbol,notempty + + If the subject string is empty and \= is followed by whitespace, the + line is treated as a comment line, and is not used for matching. For + example: + + \= This is a comment. + abc\= This is an invalid modifier list. + + A backslash followed by any other non-alphanumeric character just es- + capes that character. A backslash followed by anything else causes an + error. However, if the very last character in the line is a backslash + (and there is no modifier list), it is ignored. This gives a way of + passing an empty line as data, since a real empty line terminates the + data input. + + If the subject_literal modifier is set for a pattern, all subject lines + that follow are treated as literals, with no special treatment of back- + slashes. No replication is possible, and any subject modifiers must be + set as defaults by a #subject command. + + +PATTERN MODIFIERS + + There are several types of modifier that can appear in pattern lines. + Except where noted below, they may also be used in #pattern commands. A + pattern's modifier list can add to or override default modifiers that + were set by a previous #pattern command. + + Setting compilation options + + The following modifiers set options for pcre2_compile(). Most of them + set bits in the options argument of that function, but those whose + names start with PCRE2_EXTRA are additional options that are set in the + compile context. Some of these options have single-letter abbrevia- + tions. There is special handling for /x: if a second x is present, + PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A + third appearance adds PCRE2_EXTENDED as well, though this makes no dif- + ference to the way pcre2_compile() behaves. See pcre2api for a descrip- + tion of the effects of these options. + + allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS + allow_lookaround_bsk set PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK + allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + alt_bsux set PCRE2_ALT_BSUX + alt_circumflex set PCRE2_ALT_CIRCUMFLEX + alt_extended_class set PCRE2_ALT_EXTENDED_CLASS + alt_verbnames set PCRE2_ALT_VERBNAMES + anchored set PCRE2_ANCHORED + /a ascii_all set all ASCII options + ascii_bsd set PCRE2_EXTRA_ASCII_BSD + ascii_bss set PCRE2_EXTRA_ASCII_BSS + ascii_bsw set PCRE2_EXTRA_ASCII_BSW + ascii_digit set PCRE2_EXTRA_ASCII_DIGIT + ascii_posix set PCRE2_EXTRA_ASCII_POSIX + auto_callout set PCRE2_AUTO_CALLOUT + bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + /i caseless set PCRE2_CASELESS + /r caseless_restrict set PCRE2_EXTRA_CASELESS_RESTRICT + dollar_endonly set PCRE2_DOLLAR_ENDONLY + /s dotall set PCRE2_DOTALL + dupnames set PCRE2_DUPNAMES + endanchored set PCRE2_ENDANCHORED + escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF + /x extended set PCRE2_EXTENDED + /xx extended_more set PCRE2_EXTENDED_MORE + extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX + firstline set PCRE2_FIRSTLINE + literal set PCRE2_LITERAL + match_line set PCRE2_EXTRA_MATCH_LINE + match_invalid_utf set PCRE2_MATCH_INVALID_UTF + match_unset_backref set PCRE2_MATCH_UNSET_BACKREF + match_word set PCRE2_EXTRA_MATCH_WORD + /m multiline set PCRE2_MULTILINE + never_backslash_c set PCRE2_NEVER_BACKSLASH_C + never_callout set PCRE2_EXTRA_NEVER_CALLOUT + never_ucp set PCRE2_NEVER_UCP + never_utf set PCRE2_NEVER_UTF + /n no_auto_capture set PCRE2_NO_AUTO_CAPTURE + no_auto_possess set PCRE2_NO_AUTO_POSSESS + no_bs0 set PCRE2_EXTRA_NO_BS0 + no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR + no_start_optimize set PCRE2_NO_START_OPTIMIZE + no_utf_check set PCRE2_NO_UTF_CHECK + python_octal set PCRE2_EXTRA_PYTHON_OCTAL + turkish_casing set PCRE2_EXTRA_TURKISH_CASING + ucp set PCRE2_UCP + ungreedy set PCRE2_UNGREEDY + use_offset_limit set PCRE2_USE_OFFSET_LIMIT + utf set PCRE2_UTF + + As well as turning on the PCRE2_UTF option, the utf modifier causes all + non-printing characters in output strings to be printed using the + \x{hh...} notation. Otherwise, those less than 0x100 are output in hex + without the curly brackets. Setting utf in 16-bit or 32-bit mode also + causes pattern and subject strings to be translated to UTF-16 or + UTF-32, respectively, before being passed to library functions. + + The following modifiers enable or disable performance optimizations by + calling pcre2_set_optimize() before invoking the regex compiler. + + optimization_full enable all optional optimizations + optimization_none disable all optional optimizations + auto_possess auto-possessify variable quantifiers + auto_possess_off don't auto-possessify variable quantifiers + dotstar_anchor anchor patterns starting with .* + dotstar_anchor_off don't anchor patterns starting with .* + start_optimize enable pre-scan of subject string + start_optimize_off disable pre-scan of subject string + + See the pcre2_set_optimize documentation for details on these optimiza- + tions. + + Setting compilation controls + + The following modifiers affect the compilation process or request in- + formation about the pattern. There are single-letter abbreviations for + some that are heavily used in the test files. + + /B bincode show binary code without lengths + bsr=[anycrlf|unicode] specify \R handling + callout_info show callout information + convert= request foreign pattern conversion + convert_glob_escape=c set glob escape character + convert_glob_separator=c set glob separator character + convert_length set convert buffer length + debug same as info,fullbincode + expand expand repetition syntax in pattern + framesize show matching frame size + fullbincode show binary code with lengths + /I info show info about compiled pattern + hex unquoted characters are hexadecimal + jit[=] use JIT + jitfast use JIT fast path + jitverify verify JIT use + locale= use this locale + max_pattern_compiled ) set maximum compiled pattern + _length= ) length (bytes) + max_pattern_length= set maximum pattern length (code units) + max_varlookbehind= set maximum variable lookbehind length + memory show memory used + newline= set newline type + null_context compile with a NULL context + null_pattern pass pattern as NULL + parens_nest_limit= set maximum parentheses depth + posix use the POSIX API + posix_nosub use the POSIX API with REG_NOSUB + push push compiled pattern onto the stack + pushcopy push a copy onto the stack + pushtablescopy push a copy with tables onto the stack + stackguard= test the stackguard feature + subject_literal treat all subject lines as literal + tables=[0|1|2|3] select internal tables + use_length do not zero-terminate the pattern + utf8_input treat input as UTF-8 + + The effects of these modifiers are described in the following sections. + + Newline and \R handling + + The bsr modifier specifies what \R in a pattern should match. If it is + set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to + "unicode", \R matches any Unicode newline sequence. The default can be + specified when PCRE2 is built; if it is not, the default is set to Uni- + code. + + The newline modifier specifies which characters are to be interpreted + as newlines, both in the pattern and in subject lines. The type must be + one of CR, LF, CRLF, ANYCRLF, ANY, or NUL (in upper or lower case). + + Information about a pattern + + The debug modifier is a shorthand for info,fullbincode, requesting all + available information. + + The bincode modifier causes a representation of the compiled code to be + output after compilation. This information does not contain length and + offset values, which ensures that the same output is generated for dif- + ferent internal link sizes and different code unit widths. By using + bincode, the same regression tests can be used in different environ- + ments. + + The fullbincode modifier, by contrast, does include length and offset + values. This is used in a few special tests that run only for specific + code unit widths and link sizes, and is also useful for one-off tests. + + The info modifier requests information about the compiled pattern + (whether it is anchored, has a fixed first character, and so on). The + information is obtained from the pcre2_pattern_info() function. Here + are some typical examples: + + re> /(?i)(^a|^b)/m,info + Capture group count = 1 + Compile options: multiline + Overall options: caseless multiline + First code unit at start or follows newline + Subject length lower bound = 1 + + re> /(?i)abc/info + Capture group count = 0 + Compile options: + Overall options: caseless + First code unit = 'a' (caseless) + Last code unit = 'c' (caseless) + Subject length lower bound = 3 + + "Compile options" are those specified by modifiers; "overall options" + have added options that are taken or deduced from the pattern. If both + sets of options are the same, just a single "options" line is output; + if there are no options, the line is omitted. "First code unit" is + where any match must start; if there is more than one they are listed + as "starting code units". "Last code unit" is the last literal code + unit that must be present in any match. This is not necessarily the + last character. These lines are omitted if no starting or ending code + units are recorded. The subject length line is omitted when + no_start_optimize is set because the minimum length is not calculated + when it can never be used. + + The framesize modifier shows the size, in bytes, of each storage frame + used by pcre2_match() for handling backtracking. The size depends on + the number of capturing parentheses in the pattern. A vector of these + frames is used at matching time; its overall size is shown when the + heaframes_size subject modifier is set. + + The callout_info modifier requests information about all the callouts + in the pattern. A list of them is output at the end of any other infor- + mation that is requested. For each callout, either its number or string + is given, followed by the item that follows it in the pattern. + + Passing a NULL context + + Normally, pcre2test passes a context block to pcre2_compile(). If the + null_context modifier is set, however, NULL is passed. This is for + testing that pcre2_compile() behaves correctly in this case (it uses + default values). + + Passing a NULL pattern + + The null_pattern modifier is for testing the behaviour of pcre2_com- + pile() when the pattern argument is NULL. The length value passed is + the default PCRE2_ZERO_TERMINATED unless use_length is set. Any length + other than zero causes an error. + + Specifying pattern characters in hexadecimal + + The hex modifier specifies that the characters of the pattern, except + for substrings enclosed in single or double quotes, are to be inter- + preted as pairs of hexadecimal digits. This feature is provided as a + way of creating patterns that contain binary zeros and other non-print- + ing characters. White space is permitted between pairs of digits. For + example, this pattern contains three characters: + + /ab 32 59/hex + + Parts of such a pattern are taken literally if quoted. This pattern + contains nine characters, only two of which are specified in hexadeci- + mal: + + /ab "literal" 32/hex + + Either single or double quotes may be used. There is no way of includ- + ing the delimiter within a substring. The hex and expand modifiers are + mutually exclusive. + + Specifying the pattern's length + + By default, patterns are passed to the compiling functions as zero-ter- + minated strings but can be passed by length instead of being zero-ter- + minated. The use_length modifier causes this to happen. Using a length + happens automatically (whether or not use_length is set) when hex is + set, because patterns specified in hexadecimal may contain binary ze- + ros. + + If hex or use_length is used with the POSIX wrapper API (see "Using the + POSIX wrapper API" below), the REG_PEND extension is used to pass the + pattern's length. + + Specifying a maximum for variable lookbehinds + + Variable lookbehind assertions are supported only if, for each one, + there is a maximum length (in characters) that it can match. There is a + limit on this, whose default can be set at build time, with an ultimate + default of 255. The max_varlookbehind modifier uses the + pcre2_set_max_varlookbehind() function to change the limit. Lookbehinds + whose branches each match a fixed length are limited to 65535 charac- + ters per branch. + + Specifying wide characters in 16-bit and 32-bit modes + + In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 + and translated to UTF-16 or UTF-32 when the utf modifier is set. For + testing the 16-bit and 32-bit libraries in non-UTF mode, the utf8_input + modifier can be used. It is mutually exclusive with utf. Input lines + are interpreted as UTF-8 as a means of specifying wide characters. More + details are given in "Input encoding" above. + + Generating long repetitive patterns + + Some tests use long patterns that are very repetitive. Instead of cre- + ating a very long input line for such a pattern, you can use a special + repetition feature, similar to the one described for subject lines + above. If the expand modifier is present on a pattern, parts of the + pattern that have the form + + \[]{} + + are expanded before the pattern is passed to pcre2_compile(). For exam- + ple, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction + cannot be nested. An initial "\[" sequence is recognized only if "]{" + followed by decimal digits and "}" is found later in the pattern. If + not, the characters remain in the pattern unaltered. The expand and hex + modifiers are mutually exclusive. + + If part of an expanded pattern looks like an expansion, but is really + part of the actual pattern, unwanted expansion can be avoided by giving + two values in the quantifier. For example, \[AB]{6000,6000} is not rec- + ognized as an expansion item. + + If the info modifier is set on an expanded pattern, the result of the + expansion is included in the information that is output. + + JIT compilation + + Just-in-time (JIT) compiling is a heavyweight optimization that can + greatly speed up pattern matching. See the pcre2jit documentation for + details. JIT compiling happens, optionally, after a pattern has been + successfully compiled into an internal form. The JIT compiler converts + this to optimized machine code. It needs to know whether the match-time + options PCRE2_PARTIAL_HARD and PCRE2_PARTIAL_SOFT are going to be used, + because different code is generated for the different cases. See the + partial modifier in "Subject Modifiers" below for details of how these + options are specified for each match attempt. + + JIT compilation is requested by the jit pattern modifier, which may op- + tionally be followed by an equals sign and a number in the range 0 to + 7. The three bits that make up the number specify which of the three + JIT operating modes are to be compiled: + + 1 compile JIT code for non-partial matching + 2 compile JIT code for soft partial matching + 4 compile JIT code for hard partial matching + + The possible values for the jit modifier are therefore: + + 0 disable JIT + 1 normal matching only + 2 soft partial matching only + 3 normal and soft partial matching + 4 hard partial matching only + 6 soft and hard partial matching only + 7 all three modes + + If no number is given, 7 is assumed. The phrase "partial matching" + means a call to pcre2_match() with either the PCRE2_PARTIAL_SOFT or the + PCRE2_PARTIAL_HARD option set. Note that such a call may return a com- + plete match; the options enable the possibility of a partial match, but + do not require it. Note also that if you request JIT compilation only + for partial matching (for example, jit=2) but do not set the partial + modifier on a subject line, that match will not use JIT code because + none was compiled for non-partial matching. + + If JIT compilation is successful, the compiled JIT code will automati- + cally be used when an appropriate type of match is run, except when in- + compatible run-time options are specified. For more details, see the + pcre2jit documentation. See also the jitstack modifier below for a way + of setting the size of the JIT stack. + + If the jitfast modifier is specified, matching is done using the JIT + "fast path" interface, pcre2_jit_match(), which skips some of the san- + ity checks that are done by pcre2_match(), and of course does not work + when JIT is not supported. If jitfast is specified without jit, jit=7 + is assumed. + + If the jitverify modifier is specified, information about the compiled + pattern shows whether JIT compilation was or was not successful. If + jitverify is specified without jit, jit=7 is assumed. If JIT compila- + tion is successful when jitverify is set, the text "(JIT)" is added to + the first output line after a match or non match when JIT-compiled code + was actually used in the match. + + Setting a locale + + The locale modifier must specify the name of a locale, for example: + + /pattern/locale=fr_FR + + The given locale is set, pcre2_maketables() is called to build a set of + character tables for the locale, and this is then passed to pcre2_com- + pile() when compiling the regular expression. The same tables are used + when matching the following subject lines. The locale modifier applies + only to the pattern on which it appears, but can be given in a #pattern + command if a default is needed. Setting a locale and alternate charac- + ter tables are mutually exclusive. + + Showing pattern memory + + The memory modifier causes the size in bytes of the memory used to hold + the compiled pattern to be output. This does not include the size of + the pcre2_code block; it is just the actual compiled data. If the pat- + tern is subsequently passed to the JIT compiler, the size of the JIT + compiled code is also output. Here is an example: + + re> /a(b)c/jit,memory + Memory allocation (code space): 21 + Memory allocation (JIT code): 1910 + + + Limiting nested parentheses + + The parens_nest_limit modifier sets a limit on the depth of nested + parentheses in a pattern. Breaching the limit causes a compilation er- + ror. The default for the library is set when PCRE2 is built, but + pcre2test sets its own default of 220, which is required for running + the standard test suite. + + Limiting the pattern length + + The max_pattern_length modifier sets a limit, in code units, to the + length of pattern that pcre2_compile() will accept. Breaching the limit + causes a compilation error. The default is the largest number a + PCRE2_SIZE variable can hold (essentially unlimited). + + Limiting the size of a compiled pattern + + The max_pattern_compiled_length modifier sets a limit, in bytes, to the + amount of memory used by a compiled pattern. Breaching the limit causes + a compilation error. The default is the largest number a PCRE2_SIZE + variable can hold (essentially unlimited). + + Using the POSIX wrapper API + + The posix and posix_nosub modifiers cause pcre2test to call PCRE2 via + the POSIX wrapper API rather than its native API. When posix_nosub is + used, the POSIX option REG_NOSUB is passed to regcomp(). The POSIX + wrapper supports only the 8-bit library. Note that it does not imply + POSIX matching semantics; for more detail see the pcre2posix documenta- + tion. The following pattern modifiers set options for the regcomp() + function: + + caseless REG_ICASE + multiline REG_NEWLINE + dotall REG_DOTALL ) + ungreedy REG_UNGREEDY ) These options are not part of + ucp REG_UCP ) the POSIX standard + utf REG_UTF8 ) + + The regerror_buffsize modifier specifies a size for the error buffer + that is passed to regerror() in the event of a compilation error. For + example: + + /abc/posix,regerror_buffsize=20 + + This provides a means of testing the behaviour of regerror() when the + buffer is too small for the error message. If this modifier has not + been set, a large buffer is used. + + The aftertext and allaftertext subject modifiers work as described be- + low. All other modifiers are either ignored, with a warning message, or + cause an error. + + The pattern is passed to regcomp() as a zero-terminated string by de- + fault, but if the use_length or hex modifiers are set, the REG_PEND ex- + tension is used to pass it by length. + + Testing the stack guard feature + + The stackguard modifier is used to test the use of pcre2_set_com- + pile_recursion_guard(), a function that is provided to enable stack + availability to be checked during compilation (see the pcre2api docu- + mentation for details). If the number specified by the modifier is + greater than zero, pcre2_set_compile_recursion_guard() is called to set + up callback from pcre2_compile() to a local function. The argument it + receives is the current nesting parenthesis depth; if this is greater + than the value given by the modifier, non-zero is returned, causing the + compilation to be aborted. + + Using alternative character tables + + The value specified for the tables modifier must be one of the digits + 0, 1, 2, or 3. It causes a specific set of built-in character tables to + be passed to pcre2_compile(). This is used in the PCRE2 tests to check + behaviour with different character tables. The digit specifies the ta- + bles as follows: + + 0 do not pass any special character tables + 1 the default ASCII tables, as distributed in + pcre2_chartables.c.dist + 2 a set of tables defining ISO 8859 characters + 3 a set of tables loaded by the #loadtables command + + In tables 2, some characters whose codes are greater than 128 are iden- + tified as letters, digits, spaces, etc. Tables 3 can be used only after + a #loadtables command has loaded them from a binary file. Setting al- + ternate character tables and a locale are mutually exclusive. + + Setting certain match controls + + The following modifiers are really subject modifiers, and are described + under "Subject Modifiers" below. However, they may be included in a + pattern's modifier list, in which case they are applied to every sub- + ject line that is processed with that pattern. These modifiers do not + affect the compilation process. + + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allvector show the entire ovector + allusedtext show all consulted text + altglobal alternative global matching + /g global global matching + heapframes_size show match data heapframes size + jitstack= set size of JIT stack + mark show mark values + replace= specify a replacement string + startchar show starting character when relevant + substitute_callout use substitution callouts + substitute_case_callout use substitution case callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution + substitute_stop= skip substitution and following + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + + These modifiers may not appear in a #pattern command. If you want them + as defaults, set them in a #subject command. + + Specifying literal subject lines + + If the subject_literal modifier is present on a pattern, all the sub- + ject lines that it matches are taken as literal strings, with no inter- + pretation of backslashes. It is not possible to set subject modifiers + on such lines, but any that are set as defaults by a #subject command + are recognized. + + Saving a compiled pattern + + When a pattern with the push modifier is successfully compiled, it is + pushed onto a stack of compiled patterns, and pcre2test expects the + next line to contain a new pattern (or a command) instead of a subject + line. This facility is used when saving compiled patterns to a file, as + described in the section entitled "Saving and restoring compiled pat- + terns" below. If pushcopy is used instead of push, a copy of the com- + piled pattern is stacked, leaving the original as current, ready to + match the following input lines. This provides a way of testing the + pcre2_code_copy() function. The push and pushcopy modifiers are in- + compatible with compilation modifiers such as global that act at match + time. Any that are specified are ignored (for the stacked copy), with a + warning message, except for replace, which causes an error. Note that + jitverify, which is allowed, does not carry through to any subsequent + matching that uses a stacked pattern. + + Testing foreign pattern conversion + + The experimental foreign pattern conversion functions in PCRE2 can be + tested by setting the convert modifier. Its argument is a colon-sepa- + rated list of options, which set the equivalent option for the + pcre2_pattern_convert() function: + + glob PCRE2_CONVERT_GLOB + glob_no_starstar PCRE2_CONVERT_GLOB_NO_STARSTAR + glob_no_wild_separator PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR + posix_basic PCRE2_CONVERT_POSIX_BASIC + posix_extended PCRE2_CONVERT_POSIX_EXTENDED + unset Unset all options + + The "unset" value is useful for turning off a default that has been set + by a #pattern command. When one of these options is set, the input pat- + tern is passed to pcre2_pattern_convert(). If the conversion is suc- + cessful, the result is reflected in the output and then passed to + pcre2_compile(). The normal utf and no_utf_check options, if set, cause + the PCRE2_CONVERT_UTF and PCRE2_CONVERT_NO_UTF_CHECK options to be + passed to pcre2_pattern_convert(). + + By default, the conversion function is allowed to allocate a buffer for + its output. However, if the convert_length modifier is set to a value + greater than zero, pcre2test passes a buffer of the given length. This + makes it possible to test the length check. + + The convert_glob_escape and convert_glob_separator modifiers can be + used to specify the escape and separator characters for glob process- + ing, overriding the defaults, which are operating-system dependent. + + +SUBJECT MODIFIERS + + The modifiers that can appear in subject lines and the #subject command + are of two types. + + Setting match options + + The following modifiers set options for pcre2_match() or + pcre2_dfa_match(). See pcre2api for a description of their effects. + + anchored set PCRE2_ANCHORED + copy_matched_subject set PCRE2_COPY_MATCHED_SUBJECT + endanchored set PCRE2_ENDANCHORED + dfa_restart set PCRE2_DFA_RESTART + dfa_shortest set PCRE2_DFA_SHORTEST + disable_recurseloop_check set PCRE2_DISABLE_RECURSELOOP_CHECK + no_jit set PCRE2_NO_JIT + no_utf_check set PCRE2_NO_UTF_CHECK + notbol set PCRE2_NOTBOL + notempty set PCRE2_NOTEMPTY + notempty_atstart set PCRE2_NOTEMPTY_ATSTART + noteol set PCRE2_NOTEOL + partial_hard (or ph) set PCRE2_PARTIAL_HARD + partial_soft (or ps) set PCRE2_PARTIAL_SOFT + + The partial matching modifiers are provided with abbreviations because + they appear frequently in tests. + + If the posix or posix_nosub modifier was present on the pattern, caus- + ing the POSIX wrapper API to be used, the only option-setting modifiers + that have any effect are notbol, notempty, and noteol, causing REG_NOT- + BOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to + regexec(). The other modifiers are ignored, with a warning message. + + There is one additional modifier that can be used with the POSIX wrap- + per. It is ignored (with a warning) if used for non-POSIX matching. + + posix_startend=[:] + + This causes the subject string to be passed to regexec() using the + REG_STARTEND option, which uses offsets to specify which part of the + string is searched. If only one number is given, the end offset is + passed as the end of the subject string. For more detail of REG_STAR- + TEND, see the pcre2posix documentation. If the subject string contains + binary zeros (coded as escapes such as \x{00} because pcre2test does + not support actual binary zeros in its input), you must use posix_star- + tend to specify its length. + + Setting match controls + + The following modifiers affect the matching process or request addi- + tional information. Some of them may also be specified on a pattern + line (see above), in which case they apply to every subject line that + is matched against that pattern, but can be overridden by modifiers on + the subject. + + aftertext show text after match + allaftertext show text after captures + allcaptures show all captures + allusedtext show all consulted text (non-JIT only) + allvector show the entire ovector + altglobal alternative global matching + callout_capture show captures at callout time + callout_data= set a value to pass via callouts + callout_error=[:] control callout error + callout_extra show extra callout information + callout_fail=[:] control callout failure + callout_no_where do not show position of a callout + callout_none do not supply a callout function + copy= copy captured substring + depth_limit= set a depth limit + dfa use pcre2_dfa_match() + find_limits find heap, match and depth limits + find_limits_noheap find match and depth limits + get= extract captured substring + getall extract all captured substrings + /g global global matching + heapframes_size show match data heapframes size + heap_limit= set a limit on heap memory (Kbytes) + jitstack= set size of JIT stack + mark show mark values + match_limit= set a match limit + memory show heap memory usage + null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject + offset= set starting offset + offset_limit= set offset limit + ovector= set size of output vector + recursion_limit= obsolete synonym for depth_limit + replace= specify a replacement string + startchar show startchar when relevant + startoffset= same as offset= + substitute_callout use substitution callouts + substitute_case_callout use substitution case callouts + substitute_extended use PCRE2_SUBSTITUTE_EXTENDED + substitute_literal use PCRE2_SUBSTITUTE_LITERAL + substitute_matched use PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_skip= skip substitution number n + substitute_stop= skip substitution number n and greater + substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY + zero_terminate pass the subject as zero-terminated + + The effects of these modifiers are described in the following sections. + When matching via the POSIX wrapper API, the aftertext, allaftertext, + and ovector subject modifiers work as described below. All other modi- + fiers are either ignored, with a warning message, or cause an error. + + Showing more text + + The aftertext modifier requests that as well as outputting the part of + the subject string that matched the entire pattern, pcre2test should in + addition output the remainder of the subject string. This is useful for + tests where the subject contains multiple copies of the same substring. + The allaftertext modifier requests the same action for captured sub- + strings as well as the main matched substring. In each case the remain- + der is output on the following line with a plus character following the + capture number. + + The allusedtext modifier requests that all the text that was consulted + during a successful pattern match by the interpreter should be shown, + for both full and partial matches. This feature is not supported for + JIT matching, and if requested with JIT it is ignored (with a warning + message). Setting this modifier affects the output if there is a look- + behind at the start of a match, or, for a complete match, a lookahead + at the end, or if \K is used in the pattern. Characters that precede or + follow the start and end of the actual match are indicated in the out- + put by '<' or '>' characters underneath them. Here is an example: + + re> /(?<=pqr)abc(?=xyz)/ + data> 123pqrabcxyz456\=allusedtext + 0: pqrabcxyz + <<< >>> + data> 123pqrabcxy\=ph,allusedtext + Partial match: pqrabcxy + <<< + + The first, complete match shows that the matched string is "abc", with + the preceding and following strings "pqr" and "xyz" having been con- + sulted during the match (when processing the assertions). The partial + match can indicate only the preceding string. + + The startchar modifier requests that the starting character for the + match be indicated, if it is different to the start of the matched + string. The only time when this occurs is when \K has been processed as + part of the match. In this situation, the output for the matched string + is displayed from the starting character instead of from the match + point, with circumflex characters under the earlier characters. For ex- + ample: + + re> /abc\Kxyz/ + data> abcxyz\=startchar + 0: abcxyz + ^^^ + + Unlike allusedtext, the startchar modifier can be used with JIT. How- + ever, these two modifiers are mutually exclusive. + + Showing the value of all capture groups + + The allcaptures modifier requests that the values of all potential cap- + tured parentheses be output after a match. By default, only those up to + the highest one actually used in the match are output (corresponding to + the return code from pcre2_match()). Groups that did not take part in + the match are output as "". This modifier is not relevant for + DFA matching (which does no capturing) and does not apply when replace + is specified; it is ignored, with a warning message, if present. + + Showing the entire ovector, for all outcomes + + The allvector modifier requests that the entire ovector be shown, what- + ever the outcome of the match. Compare allcaptures, which shows only up + to the maximum number of capture groups for the pattern, and then only + for a successful complete non-DFA match. This modifier, which acts af- + ter any match result, and also for DFA matching, provides a means of + checking that there are no unexpected modifications to ovector fields. + Before each match attempt, the ovector is filled with a special value, + and if this is found in both elements of a capturing pair, "" is output. After a successful match, this applies to all + groups after the maximum capture group for the pattern. In other cases + it applies to the entire ovector. After a partial match, the first two + elements are the only ones that should be set. After a DFA match, the + amount of ovector that is used depends on the number of matches that + were found. + + Testing pattern callouts + + A callout function is supplied when pcre2test calls the library match- + ing functions, unless callout_none is specified. Its behaviour can be + controlled by various modifiers listed above whose names begin with + callout_. Details are given in the section entitled "Callouts" below. + Testing callouts from pcre2_substitute() is described separately in + "Testing the substitution function" below. + + Finding all matches in a string + + Searching for all possible matches within a subject can be requested by + the global or altglobal modifier. After finding a match, the matching + function is called again to search the remainder of the subject. The + difference between global and altglobal is that the former uses the + start_offset argument to pcre2_match() or pcre2_dfa_match() to start + searching at a new point within the entire string (which is what Perl + does), whereas the latter passes over a shortened subject. This makes a + difference to the matching process if the pattern begins with a lookbe- + hind assertion (including \b or \B). + + If an empty string is matched, the next match is done with the + PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search + for another, non-empty, match at the same point in the subject. If this + match fails, the start offset is advanced, and the normal match is re- + tried. This imitates the way Perl handles such cases when using the /g + modifier or the split() function. Normally, the start offset is ad- + vanced by one character, but if the newline convention recognizes CRLF + as a newline, and the current character is CR followed by LF, an ad- + vance of two characters occurs. + + Testing substring extraction functions + + The copy and get modifiers can be used to test the pcre2_sub- + string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be + given more than once, and each can specify a capture group name or num- + ber, for example: + + abcd\=copy=1,copy=3,get=G1 + + If the #subject command is used to set default copy and/or get lists, + these can be unset by specifying a negative number to cancel all num- + bered groups and an empty name to cancel all named groups. + + The getall modifier tests pcre2_substring_list_get(), which extracts + all captured substrings. + + If the subject line is successfully matched, the substrings extracted + by the convenience functions are output with C, G, or L after the + string number instead of a colon. This is in addition to the normal + full list. The string length (that is, the return from the extraction + function) is given in parentheses after each substring, followed by the + name when the extraction was by name. + + Testing the substitution function + + If the replace modifier is set, the pcre2_substitute() function is + called instead of one of the matching functions (or after one call of + pcre2_match() in the case of PCRE2_SUBSTITUTE_MATCHED). Note that re- + placement strings cannot contain commas, because a comma signifies the + end of a modifier. This is not thought to be an issue in a test pro- + gram. + + Specifying a completely empty replacement string disables this modi- + fier. However, it is possible to specify an empty replacement by pro- + viding a buffer length, as described below, for an otherwise empty re- + placement. + + Unlike subject strings, pcre2test does not process replacement strings + for escape sequences. In UTF mode, a replacement string is checked to + see if it is a valid UTF-8 string. If so, it is correctly converted to + a UTF string of the appropriate code unit width. If it is not a valid + UTF-8 string, the individual code units are copied directly. This pro- + vides a means of passing an invalid UTF-8 string for testing purposes. + + The following modifiers set options (in additional to the normal match + options) for pcre2_substitute(): + + global PCRE2_SUBSTITUTE_GLOBAL + substitute_extended PCRE2_SUBSTITUTE_EXTENDED + substitute_literal PCRE2_SUBSTITUTE_LITERAL + substitute_matched PCRE2_SUBSTITUTE_MATCHED + substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH + substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY + substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET + substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY + + See the pcre2api documentation for details of these options. + + After a successful substitution, the modified string is output, pre- + ceded by the number of replacements. This may be zero if there were no + matches. Here is a simple example of a substitution test: + + /abc/replace=xxx + =abc=abc= + 1: =xxx=abc= + =abc=abc=\=global + 2: =xxx=xxx= + + Subject and replacement strings should be kept relatively short (fewer + than 256 characters) for substitution tests, as fixed-size buffers are + used. To make it easy to test for buffer overflow, if the replacement + string starts with a number in square brackets, that number is passed + to pcre2_substitute() as the size of the output buffer, with the re- + placement string starting at the next character. Here is an example + that tests the edge case: + + /abc/ + 123abc123\=replace=[10]XYZ + 1: 123XYZ123 + 123abc123\=replace=[9]XYZ + Failed: error -47: no more memory + + The default action of pcre2_substitute() is to return PCRE2_ER- + ROR_NOMEMORY when the output buffer is too small. However, if the + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the substi- + tute_overflow_length modifier), pcre2_substitute() continues to go + through the motions of matching and substituting (but not doing any + callouts), in order to compute the size of buffer that is required. + When this happens, pcre2test shows the required buffer length (which + includes space for the trailing zero) as part of the error message. For + example: + + /abc/substitute_overflow_length + 123abc123\=replace=[9]XYZ + Failed: error -47: no more memory: 10 code units are needed + + A replacement string is ignored with POSIX and DFA matching. Specifying + partial matching provokes an error return ("bad option value") from + pcre2_substitute(). + + Testing substitute callouts + + If the substitute_callout modifier is set, a substitution callout func- + tion is set up. The null_context modifier must not be set, because the + address of the callout function is passed in a match context. When the + callout function is called (after each substitution), details of the + input and output strings are output. For example: + + /abc/g,replace=<$0>,substitute_callout + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 "" + 2(1) Old 6 9 "abc" New 8 13 "" + 2: defpqr + + The first number on each callout line is the count of matches. The + parenthesized number is the number of pairs that are set in the ovector + (that is, one more than the number of capturing groups that were set). + Then are listed the offsets of the old substring, its contents, and the + same for the replacement. + + By default, the substitution callout function returns zero, which ac- + cepts the replacement and causes matching to continue if /g was used. + Two further modifiers can be used to test other return values. If sub- + stitute_skip is set to a value greater than zero the callout function + returns +1 for the match of that number, and similarly substitute_stop + returns -1. These cause the replacement to be rejected, and -1 causes + no further matching to take place. If either of them are set, substi- + tute_callout is assumed. For example: + + /abc/g,replace=<$0>,substitute_skip=1 + abcdefabcpqr + 1(1) Old 0 3 "abc" New 0 5 " SKIPPED" + 2(1) Old 6 9 "abc" New 6 11 "" + 2: abcdefpqr + abcdefabcpqr\=substitute_stop=1 + 1(1) Old 0 3 "abc" New 0 5 " STOPPED" + 1: abcdefabcpqr + + If both are set for the same number, stop takes precedence. Only a sin- + gle skip or stop is supported, which is sufficient for testing that the + feature works. + + Testing substitute case callouts + + If the substitute_case_callout modifier is set, a substitution case + callout function is set up. The callout function is called for each + substituted chunk which is to be case-transformed. + + The callout function passed is a fixed function with implementation for + certain behaviours: inputs which shrink when case-transformed; inputs + which grow; inputs with distinct upper/lower/titlecase forms. The char- + acters which are not special-cased for testing purposes are left unmod- + ified, as if they are caseless characters. + + Setting the JIT stack size + + The jitstack modifier provides a way of setting the maximum stack size + that is used by the just-in-time optimization code. It is ignored if + JIT optimization is not being used. The value is a number of kibibytes + (units of 1024 bytes). Setting zero reverts to the default of 32KiB. + Providing a stack that is larger than the default is necessary only for + very complicated patterns. If jitstack is set non-zero on a subject + line it overrides any value that was set on the pattern. + + Setting heap, match, and depth limits + + The heap_limit, match_limit, and depth_limit modifiers set the appro- + priate limits in the match context. These values are ignored when the + find_limits or find_limits_noheap modifier is specified. + + Finding minimum limits + + If the find_limits modifier is present on a subject line, pcre2test + calls the relevant matching function several times, setting different + values in the match context via pcre2_set_heap_limit(), + pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the + smallest value for each parameter that allows the match to complete + without a "limit exceeded" error. The match itself may succeed or fail. + An alternative modifier, find_limits_noheap, omits the heap limit. This + is used in the standard tests, because the minimum heap limit varies + between systems. If JIT is being used, only the match limit is rele- + vant, and the other two are automatically omitted. + + When using this modifier, the pattern should not contain any limit set- + tings such as (*LIMIT_MATCH=...) within it. If such a setting is + present and is lower than the minimum matching value, the minimum value + cannot be found because pcre2_set_match_limit() etc. are only able to + reduce the value of an in-pattern limit; they cannot increase it. + + For non-DFA matching, the minimum depth_limit number is a measure of + how much nested backtracking happens (that is, how deeply the pattern's + tree is searched). In the case of DFA matching, depth_limit controls + the depth of recursive calls of the internal function that is used for + handling pattern recursion, lookaround assertions, and atomic groups. + + For non-DFA matching, the match_limit number is a measure of the amount + of backtracking that takes place, and learning the minimum value can be + instructive. For most simple matches, the number is quite small, but + for patterns with very large numbers of matching possibilities, it can + become large very quickly with increasing length of subject string. In + the case of DFA matching, match_limit controls the total number of + calls, both recursive and non-recursive, to the internal matching func- + tion, thus controlling the overall amount of computing resource that is + used. + + For both kinds of matching, the heap_limit number, which is in + kibibytes (units of 1024 bytes), limits the amount of heap memory used + for matching. + + Showing MARK names + + + The mark modifier causes the names from backtracking control verbs that + are returned from calls to pcre2_match() to be displayed. If a mark is + returned for a match, non-match, or partial match, pcre2test shows it. + For a match, it is on a line by itself, tagged with "MK:". Otherwise, + it is added to the non-match message. + + Showing memory usage + + The memory modifier causes pcre2test to log the sizes of all heap mem- + ory allocation and freeing calls that occur during a call to + pcre2_match() or pcre2_dfa_match(). In the latter case, heap memory is + used only when a match requires more internal workspace that the de- + fault allocation on the stack, so in many cases there will be no out- + put. No heap memory is allocated during matching with JIT. For this + modifier to work, the null_context modifier must not be set on both the + pattern and the subject, though it can be set on one or the other. + + Showing the heap frame overall vector size + + The heapframes_size modifier is relevant for matches using + pcre2_match() without JIT. After a match has run (whether successful or + not) the size, in bytes, of the allocated heap frames vector that is + left attached to the match data block is shown. If the matching action + involved several calls to pcre2_match() (for example, global matching + or for timing) only the final value is shown. + + This modifier is ignored, with a warning, for POSIX or DFA matching. + JIT matching does not use the heap frames vector, so the size is always + zero, unless there was a previous non-JIT match. Note that specifing a + size of zero for the output vector (see below) causes pcre2test to free + its match data block (and associated heap frames vector) and allocate a + new one. + + Setting a starting offset + + The offset modifier sets an offset in the subject string at which + matching starts. Its value is a number of code units, not characters. + + Setting an offset limit + + The offset_limit modifier sets a limit for unanchored matches. If a + match cannot be found starting at or before this offset in the subject, + a "no match" return is given. The data value is a number of code units, + not characters. When this modifier is used, the use_offset_limit modi- + fier must have been set for the pattern; if not, an error is generated. + + Setting the size of the output vector + + The ovector modifier applies only to the subject line in which it ap- + pears, though of course it can also be used to set a default in a #sub- + ject command. It specifies the number of pairs of offsets that are + available for storing matching information. The default is 15. + + A value of zero is useful when testing the POSIX API because it causes + regexec() to be called with a NULL capture vector. When not testing the + POSIX API, a value of zero is used to cause pcre2_match_data_cre- + ate_from_pattern() to be called, in order to create a new match block + of exactly the right size for the pattern. (It is not possible to cre- + ate a match block with a zero-length ovector; there is always at least + one pair of offsets.) The old match data block is freed. + + Passing the subject as zero-terminated + + By default, the subject string is passed to a native API matching func- + tion with its correct length. In order to test the facility for passing + a zero-terminated string, the zero_terminate modifier is provided. It + causes the length to be passed as PCRE2_ZERO_TERMINATED. When matching + via the POSIX interface, this modifier is ignored, with a warning. + + When testing pcre2_substitute(), this modifier also has the effect of + passing the replacement string as zero-terminated. + + Passing a NULL context, subject, or replacement + + Normally, pcre2test passes a context block to pcre2_match(), + pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the + null_context modifier is set, however, NULL is passed. This is for + testing that the matching and substitution functions behave correctly + in this case (they use default values). This modifier cannot be used + with the find_limits, find_limits_noheap, or substitute_callout modi- + fiers. + + Similarly, for testing purposes, if the null_subject or null_replace- + ment modifier is set, the subject or replacement string pointers are + passed as NULL, respectively, to the relevant functions. + + +THE ALTERNATIVE MATCHING FUNCTION + + By default, pcre2test uses the standard PCRE2 matching function, + pcre2_match() to match each subject line. PCRE2 also supports an alter- + native matching function, pcre2_dfa_match(), which operates in a dif- + ferent way, and has some restrictions. The differences between the two + functions are described in the pcre2matching documentation. + + If the dfa modifier is set, the alternative matching function is used. + This function finds all possible matches at a given point in the sub- + ject. If, however, the dfa_shortest modifier is set, processing stops + after the first match is found. This is always the shortest possible + match. + + +DEFAULT OUTPUT FROM pcre2test + + This section describes the output when the normal matching function, + pcre2_match(), is being used. + + When a match succeeds, pcre2test outputs the list of captured sub- + strings, starting with number 0 for the string that matched the whole + pattern. Otherwise, it outputs "No match" when the return is PCRE2_ER- + ROR_NOMATCH, or "Partial match:" followed by the partially matching + substring when the return is PCRE2_ERROR_PARTIAL. (Note that this is + the entire substring that was inspected during the partial match; it + may include characters before the actual match start if a lookbehind + assertion, \K, \b, or \B was involved.) + + For any other return, pcre2test outputs the PCRE2 negative error number + and a short descriptive phrase. If the error is a failed UTF string + check, the code unit offset of the start of the failing character is + also output. Here is an example of an interactive pcre2test run. + + $ pcre2test + PCRE2 version 10.22 2016-07-29 + + re> /^abc(\d+)/ + data> abc123 + 0: abc123 + 1: 123 + data> xyz + No match + + Unset capturing substrings that are not followed by one that is set are + not shown by pcre2test unless the allcaptures modifier is specified. In + the following example, there are two capturing substrings, but when the + first data line is matched, the second, unset substring is not shown. + An "internal" unset substring is shown as "", as for the second + data line. + + re> /(a)|(b)/ + data> a + 0: a + 1: a + data> b + 0: b + 1: + 2: b + + If the strings contain any non-printing characters, they are output as + \xhh escapes if the value is less than 256 and UTF mode is not set. + Otherwise they are output as \x{hh...} escapes. See below for the defi- + nition of non-printing characters. If the aftertext modifier is set, + the output for substring 0 is followed by the rest of the subject + string, identified by "0+" like this: + + re> /cat/aftertext + data> cataract + 0: cat + 0+ aract + + If global matching is requested, the results of successive matching at- + tempts are output in sequence, like this: + + re> /\Bi(\w\w)/g + data> Mississippi + 0: iss + 1: ss + 0: iss + 1: ss + 0: ipp + 1: pp + + "No match" is output only if the first match attempt fails. Here is an + example of a failure message (the offset 4 that is specified by the + offset modifier is past the end of the subject string): + + re> /xyz/ + data> xyz\=offset=4 + Error -24 (bad offset value) + + Note that whereas patterns can be continued over several lines (a plain + ">" prompt is used for continuations), subject lines may not. However + newlines can be included in a subject by means of the \n escape (or \r, + \r\n, etc., depending on the newline sequence setting). + + +OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION + + When the alternative matching function, pcre2_dfa_match(), is used, the + output consists of a list of all the matches that start at the first + point in the subject where there is at least one match. For example: + + re> /(tang|tangerine|tan)/ + data> yellow tangerine\=dfa + 0: tangerine + 1: tang + 2: tan + + Using the normal matching function on this data finds only "tang". The + longest matching string is always given first (and numbered zero). Af- + ter a PCRE2_ERROR_PARTIAL return, the output is "Partial match:", fol- + lowed by the partially matching substring. Note that this is the entire + substring that was inspected during the partial match; it may include + characters before the actual match start if a lookbehind assertion, \b, + or \B was involved. (\K is not supported for DFA matching.) + + If global matching is requested, the search for further matches resumes + at the end of the longest match. For example: + + re> /(tang|tangerine|tan)/g + data> yellow tangerine and tangy sultana\=dfa + 0: tangerine + 1: tang + 2: tan + 0: tang + 1: tan + 0: tan + + The alternative matching function does not support substring capture, + so the modifiers that are concerned with captured substrings are not + relevant. + + +RESTARTING AFTER A PARTIAL MATCH + + When the alternative matching function has given the PCRE2_ERROR_PAR- + TIAL return, indicating that the subject partially matched the pattern, + you can restart the match with additional subject data by means of the + dfa_restart modifier. For example: + + re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ + data> 23ja\=ps,dfa + Partial match: 23ja + data> n05\=dfa,dfa_restart + 0: n05 + + For further information about partial matching, see the pcre2partial + documentation. + + +CALLOUTS + + If the pattern contains any callout requests, pcre2test's callout func- + tion is called during matching unless callout_none is specified. This + works with both matching functions, and with JIT, though there are some + differences in behaviour. The output for callouts with numerical argu- + ments and those with string arguments is slightly different. + + Callouts with numerical arguments + + By default, the callout function displays the callout number, the start + and current positions in the subject text at the callout time, and the + next pattern item to be tested. For example: + + --->pqrabcdef + 0 ^ ^ \d + + This output indicates that callout number 0 occurred for a match at- + tempt starting at the fourth character of the subject string, when the + pointer was at the seventh character, and when the next pattern item + was \d. Just one circumflex is output if the start and current posi- + tions are the same, or if the current position precedes the start posi- + tion, which can happen if the callout is in a lookbehind assertion. + + Callouts numbered 255 are assumed to be automatic callouts, inserted as + a result of the auto_callout pattern modifier. In this case, instead of + showing the callout number, the offset in the pattern, preceded by a + plus, is output. For example: + + re> /\d?[A-E]\*/auto_callout + data> E* + --->E* + +0 ^ \d? + +3 ^ [A-E] + +8 ^^ \* + +10 ^ ^ + 0: E* + + If a pattern contains (*MARK) items, an additional line is output when- + ever a change of latest mark is passed to the callout function. For ex- + ample: + + re> /a(*MARK:X)bc/auto_callout + data> abc + --->abc + +0 ^ a + +1 ^^ (*MARK:X) + +10 ^^ b + Latest Mark: X + +11 ^ ^ c + +12 ^ ^ + 0: abc + + The mark changes between matching "a" and "b", but stays the same for + the rest of the match, so nothing more is output. If, as a result of + backtracking, the mark reverts to being unset, the text "" is + output. + + Callouts with string arguments + + The output for a callout with a string argument is similar, except that + instead of outputting a callout number before the position indicators, + the callout string and its offset in the pattern string are output be- + fore the reflection of the subject string, and the subject string is + reflected for each callout. For example: + + re> /^ab(?C'first')cd(?C"second")ef/ + data> abcdefg + Callout (7): 'first' + --->abcdefg + ^ ^ c + Callout (20): "second" + --->abcdefg + ^ ^ e + 0: abcdef + + + Callout modifiers + + The callout function in pcre2test returns zero (carry on matching) by + default, but you can use a callout_fail modifier in a subject line to + change this and other parameters of the callout (see below). + + If the callout_capture modifier is set, the current captured groups are + output when a callout occurs. This is useful only for non-DFA matching, + as pcre2_dfa_match() does not support capturing, so no captures are + ever shown. + + The normal callout output, showing the callout number or pattern offset + (as described above) is suppressed if the callout_no_where modifier is + set. + + When using the interpretive matching function pcre2_match() without + JIT, setting the callout_extra modifier causes additional output from + pcre2test's callout function to be generated. For the first callout in + a match attempt at a new starting position in the subject, "New match + attempt" is output. If there has been a backtrack since the last call- + out (or start of matching if this is the first callout), "Backtrack" is + output, followed by "No other matching paths" if the backtrack ended + the previous match attempt. For example: + + re> /(a+)b/auto_callout,no_start_optimize,no_auto_possess + data> aac\=callout_extra + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^ ^ ) + +4 ^ ^ b + Backtrack + --->aac + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + +3 ^^ ) + +4 ^^ b + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + Backtrack + No other matching paths + New match attempt + --->aac + +0 ^ ( + +1 ^ a+ + No match + + Notice that various optimizations must be turned off if you want all + possible matching paths to be scanned. If no_start_optimize is not + used, there is an immediate "no match", without any callouts, because + the starting optimization fails to find "b" in the subject, which it + knows must be present for any match. If no_auto_possess is not used, + the "a+" item is turned into "a++", which reduces the number of back- + tracks. + + The callout_extra modifier has no effect if used with the DFA matching + function, or with JIT. + + Return values from callouts + + The default return from the callout function is zero, which allows + matching to continue. The callout_fail modifier can be given one or two + numbers. If there is only one number, 1 is returned instead of 0 (caus- + ing matching to backtrack) when a callout of that number is reached. If + two numbers (:) are given, 1 is returned when callout is + reached and there have been at least callouts. The callout_error + modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus- + ing the entire matching process to be aborted. If both these modifiers + are set for the same callout number, callout_error takes precedence. + Note that callouts with string arguments are always given the number + zero. + + The callout_data modifier can be given an unsigned or a negative num- + ber. This is set as the "user data" that is passed to the matching + function, and passed back when the callout function is invoked. Any + value other than zero is used as a return from pcre2test's callout + function. + + Inserting callouts can be helpful when using pcre2test to check compli- + cated regular expressions. For further information about callouts, see + the pcre2callout documentation. + + +NON-PRINTING CHARACTERS + + When pcre2test is outputting text in the compiled version of a pattern, + bytes other than 32-126 are always treated as non-printing characters + and are therefore shown as hex escapes. + + When pcre2test is outputting text that is a matched part of a subject + string, it behaves in the same way, unless a different locale has been + set for the pattern (using the locale modifier). In this case, the is- + print() function is used to distinguish printing and non-printing char- + acters. + + +SAVING AND RESTORING COMPILED PATTERNS + + It is possible to save compiled patterns on disc or elsewhere, and re- + load them later, subject to a number of restrictions. JIT data cannot + be saved. The host on which the patterns are reloaded must be running + the same version of PCRE2, with the same code unit width, and must also + have the same endianness, pointer width and PCRE2_SIZE type. Before + compiled patterns can be saved they must be serialized, that is, con- + verted to a stream of bytes. A single byte stream may contain any num- + ber of compiled patterns, but they must all use the same character ta- + bles. A single copy of the tables is included in the byte stream (its + size is 1088 bytes). + + The functions whose names begin with pcre2_serialize_ are used for se- + rializing and de-serializing. They are described in the pcre2serialize + documentation. In this section we describe the features of pcre2test + that can be used to test these functions. + + Note that "serialization" in PCRE2 does not convert compiled patterns + to an abstract format like Java or .NET. It just makes a reloadable + byte code stream. Hence the restrictions on reloading mentioned above. + + In pcre2test, when a pattern with push modifier is successfully com- + piled, it is pushed onto a stack of compiled patterns, and pcre2test + expects the next line to contain a new pattern (or command) instead of + a subject line. By contrast, the pushcopy modifier causes a copy of the + compiled pattern to be stacked, leaving the original available for im- + mediate matching. By using push and/or pushcopy, a number of patterns + can be compiled and retained. These modifiers are incompatible with + posix, and control modifiers that act at match time are ignored (with a + message) for the stacked patterns. The jitverify modifier applies only + at compile time. + + The command + + #save + + causes all the stacked patterns to be serialized and the result written + to the named file. Afterwards, all the stacked patterns are freed. The + command + + #load + + reads the data in the file, and then arranges for it to be de-serial- + ized, with the resulting compiled patterns added to the pattern stack. + The pattern on the top of the stack can be retrieved by the #pop com- + mand, which must be followed by lines of subjects that are to be + matched with the pattern, terminated as usual by an empty line or end + of file. This command may be followed by a modifier list containing + only control modifiers that act after a pattern has been compiled. In + particular, hex, posix, posix_nosub, push, and pushcopy are not al- + lowed, nor are any option-setting modifiers. The JIT modifiers are, + however permitted. Here is an example that saves and reloads two pat- + terns. + + /abc/push + /xyz/push + #save tempfile + #load tempfile + #pop info + xyz + + #pop jit,bincode + abc + + If jitverify is used with #pop, it does not automatically imply jit, + which is different behaviour from when it is used on a pattern. + + The #popcopy command is analogous to the pushcopy modifier in that it + makes current a copy of the topmost stack pattern, leaving the original + still on the stack. + + +SEE ALSO + + pcre2(3), pcre2api(3), pcre2callout(3), pcre2jit, pcre2matching(3), + pcre2partial(d), pcre2pattern(3), pcre2serialize(3). + + +AUTHOR + + Philip Hazel + Retired from University Computing Service + Cambridge, England. + + +REVISION + + Last updated: 26 December 2024 + Copyright (c) 1997-2024 University of Cambridge. + + +PCRE2 10.45 26 December 2024 PCRE2TEST(1) diff --git a/3rd/pcre2/doc/pcre2unicode.3 b/3rd/pcre2/doc/pcre2unicode.3 new file mode 100644 index 00000000..44759ec7 --- /dev/null +++ b/3rd/pcre2/doc/pcre2unicode.3 @@ -0,0 +1,514 @@ +.TH PCRE2UNICODE 3 "27 November 2024" "PCRE2 10.45" +.SH NAME +PCRE2 - Perl-compatible regular expressions (revised API) +.SH "UNICODE AND UTF SUPPORT" +.rs +.sp +PCRE2 is normally built with Unicode support, though if you do not need it, you +can build it without, in which case the library will be smaller. With Unicode +support, PCRE2 has knowledge of Unicode character properties and can process +strings of text in UTF-8, UTF-16, and UTF-32 format (depending on the code unit +width), but this is not the default. Unless specifically requested, PCRE2 +treats each code unit in a string as one character. +.P +There are two ways of telling PCRE2 to switch to UTF mode, where characters may +consist of more than one code unit and the range of values is constrained. The +program can call +.\" HREF +\fBpcre2_compile()\fP +.\" +with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF). +However, the latter facility can be locked out by the PCRE2_NEVER_UTF option. +That is, the programmer can prevent the supplier of the pattern from switching +to UTF mode. +.P +Note that the PCRE2_MATCH_INVALID_UTF option (see +.\" HTML +.\" +below) +.\" +forces PCRE2_UTF to be set. +.P +In UTF mode, both the pattern and any subject strings that are matched against +it are treated as UTF strings instead of strings of individual one-code-unit +characters. There are also some other changes to the way characters are +handled, as documented below. +. +. +.SH "UNICODE PROPERTY SUPPORT" +.rs +.sp +When PCRE2 is built with Unicode support, the escape sequences \ep{..}, +\eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting. +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the derived properties +Any and Lc (synonym L&), the Unicode script names such as Arabic or Han, +Bidi_Class, Bidi_Control, and a few binary properties. +.P +The full lists are given in the +.\" HREF +\fBpcre2pattern\fP +.\" +and +.\" HREF +\fBpcre2syntax\fP +.\" +documentation. In general, only the short names for properties are supported. +For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. +. +. +.SH "WIDE CHARACTERS AND UTF MODES" +.rs +.sp +Code points less than 256 can be specified in patterns by either braced or +unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger +values have to use braced sequences. Unbraced octal code points up to \e777 are +also recognized; larger ones can be coded using \eo{...}. +.P +The escape sequence \eN{U+} is recognized as another way of +specifying a Unicode character by code point in a UTF mode. It is not allowed +in non-UTF mode. +.P +In UTF mode, repeat quantifiers apply to complete UTF characters, not to +individual code units. +.P +In UTF mode, the dot metacharacter matches one UTF character instead of a +single code unit. +.P +In UTF mode, capture group names are not restricted to ASCII, and may contain +any Unicode letters and decimal digits, as well as underscore. +.P +The escape sequence \eC can be used to match a single code unit in UTF mode, +but its use can lead to some strange effects because it breaks up multi-unit +characters (see the description of \eC in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation). For this reason, there is a build-time option that disables +support for \eC completely. There is also a less draconian compile-time option +for locking out the use of \eC when a pattern is compiled. +.P +The use of \eC is not supported by the alternative matching function +\fBpcre2_dfa_match()\fP when in UTF-8 or UTF-16 mode, that is, when a character +may consist of more than one code unit. The use of \eC in these modes provokes +a match-time error. Also, the JIT optimization does not support \eC in these +modes. If JIT optimization is requested for a UTF-8 or UTF-16 pattern that +contains \eC, it will not succeed, and so when \fBpcre2_match()\fP is called, +the matching will be carried out by the interpretive function. +.P +The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly test +characters of any code value, but, by default, the characters that PCRE2 +recognizes as digits, spaces, or word characters remain the same set as in +non-UTF mode, all with code points less than 256. This remains true even when +PCRE2 is built to include Unicode support, because to do otherwise would slow +down matching in many common cases. Note that this also applies to \eb +and \eB, because they are defined in terms of \ew and \eW. If you want +to test for a wider sense of, say, "digit", you can use explicit Unicode +property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option, +the way that the character escapes work is changed so that Unicode properties +are used to determine which characters match, though there are some options +that suppress this for individual escapes. For details see the section on +.\" HTML +.\" +generic character types +.\" +in the +.\" HREF +\fBpcre2pattern\fP +.\" +documentation. +.P +Like the escapes, characters that match the POSIX named character classes are +all low-valued characters unless the PCRE2_UCP option is set, but there is an +option to override this. +.P +In contrast to the character escapes and character classes, the special +horizontal and vertical white space escapes (\eh, \eH, \ev, and \eV) do match +all the appropriate Unicode characters, whether or not PCRE2_UCP is set. +. +. +.SH "UNICODE CASE-EQUIVALENCE" +.rs +.sp +If either PCRE2_UTF or PCRE2_UCP is set, upper/lower case processing makes use +of Unicode properties except for characters whose code points are less than 128 +and that have at most two case-equivalent values. For these, a direct table +lookup is used for speed. A few Unicode characters such as Greek sigma have +more than two code points that are case-equivalent, and these are treated +specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case +processing for non-UTF character encodings such as UCS-2. +.P +There are two ASCII characters (S and K) that, in addition to their ASCII lower +case equivalents, have a non-ASCII one as well (long S and Kelvin sign). +Recognition of these non-ASCII characters as case-equivalent to their ASCII +counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT +option. When this is set, all characters in a case equivalence must either be +ASCII or non-ASCII; there can be no mixing. +.sp + Without PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' = U+212A (Kelvin sign) + 's' = 'S' = U+017F (long S) + With PCRE2_EXTRA_CASELESS_RESTRICT: + 'k' = 'K' + U+212A (Kelvin sign) only case-equivalent to itself + 's' = 'S' + U+017F (long S) only case-equivalent to itself +.P +One language family, Turkish and Azeri, has its own case-insensitivity rules, +which can be selected by setting PCRE2_EXTRA_TURKISH_CASING. This alters the +behaviour of the 'i', 'I', U+0130 (capital I with dot above), and U+0131 +(small dotless i) characters. +.sp + Without PCRE2_EXTRA_TURKISH_CASING: + 'i' = 'I' + U+0130 (capital I with dot above) only case-equivalent to itself + U+0131 (small dotless i) only case-equivalent to itself + With PCRE2_EXTRA_TURKISH_CASING: + 'i' = U+0130 (capital I with dot above) + U+0131 (small dotless i) = 'I' +.P +It is not allowed to specify both PCRE2_EXTRA_CASELESS_RESTRICT and +PCRE2_EXTRA_TURKISH_CASING together. +.P +From release 10.45 the Unicode letter properties Lu (upper case), Ll (lower +case), and Lt (title case) are all treated as Lc (cased letter) when caseless +matching is set by the PCRE2_CASELESS option or (?i) within the pattern. +. +. +.\" HTML +.SH "SCRIPT RUNS" +.rs +.sp +The pattern constructs (*script_run:...) and (*atomic_script_run:...), with +synonyms (*sr:...) and (*asr:...), verify that the string matched within the +parentheses is a script run. In concept, a script run is a sequence of +characters that are all from the same Unicode script. However, because some +scripts are commonly used together, and because some diacritical and other +marks are used with multiple scripts, it is not that simple. +.P +Every Unicode character has a Script property, mostly with a value +corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There +are also three special values: +.P +"Unknown" is used for code points that have not been assigned, and also for the +surrogate code points. In the PCRE2 32-bit library, characters whose code +points are greater than the Unicode maximum (U+10FFFF), which are accessible +only in non-UTF mode, are assigned the Unknown script. +.P +"Common" is used for characters that are used with many scripts. These include +punctuation, emoji, mathematical, musical, and currency symbols, and the ASCII +digits 0 to 9. +.P +"Inherited" is used for characters such as diacritical marks that modify a +previous character. These are considered to take on the script of the character +that they modify. +.P +Some Inherited characters are used with many scripts, but many of them are only +normally used with a small number of scripts. For example, U+102E0 (Coptic +Epact thousands mark) is used only with Arabic and Coptic. In order to make it +possible to check this, a Unicode property called Script Extension exists. Its +value is a list of scripts that apply to the character. For the majority of +characters, the list contains just one script, the same one as the Script +property. However, for characters such as U+102E0 more than one Script is +listed. There are also some Common characters that have a single, non-Common +script in their Script Extension list. +.P +The next section describes the basic rules for deciding whether a given string +of characters is a script run. Note, however, that there are some special cases +involving the Chinese Han script, and an additional constraint for decimal +digits. These are covered in subsequent sections. +. +. +.SS "Basic script run rules" +.rs +.sp +A string that is less than two characters long is a script run. This is the +only case in which an Unknown character can be part of a script run. Longer +strings are checked using only the Script Extensions property, not the basic +Script property. +.P +If a character's Script Extension property is the single value "Inherited", it +is always accepted as part of a script run. This is also true for the property +"Common", subject to the checking of decimal digits described below. All the +remaining characters in a script run must have at least one script in common in +their Script Extension lists. In set-theoretic terminology, the intersection of +all the sets of scripts must not be empty. +.P +A simple example is an Internet name such as "google.com". The letters are all +in the Latin script, and the dot is Common, so this string is a script run. +However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a +string that looks the same, but with Cyrillic "o"s is not a script run. +.P +More interesting examples involve characters with more than one script in their +Script Extension. Consider the following characters: +.sp + U+060C Arabic comma + U+06D4 Arabic full stop +.sp +The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and +Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could +appear in script runs of either Arabic or Hanifi Rohingya. The first could also +appear in Syriac or Thaana script runs, but the second could not. +. +. +.SS "The Chinese Han script" +.rs +.sp +The Chinese Han script is commonly used in conjunction with other scripts for +writing certain languages. Japanese uses the Hiragana and Katakana scripts +together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo +and Han. These three combinations are treated as special cases when checking +script runs and are, in effect, "virtual scripts". Thus, a script run may +contain a mixture of Hiragana, Katakana, and Han, or a mixture of Hangul and +Han, or a mixture of Bopomofo and Han, but not, for example, a mixture of +Hangul and Bopomofo and Han. PCRE2 (like Perl) follows Unicode's Technical +Standard 39 ("Unicode Security Mechanisms", http://unicode.org/reports/tr39/) +in allowing such mixtures. +. +. +.SS "Decimal digits" +.rs +.sp +Unicode contains many sets of 10 decimal digits in different scripts, and some +scripts (including the Common script) contain more than one set. Some of these +decimal digits them are visually indistinguishable from the common ASCII +digits. In addition to the script checking described above, if a script run +contains any decimal digits, they must all come from the same set of 10 +adjacent characters. +. +. +.SH "VALIDITY OF UTF STRINGS" +.rs +.sp +When the PCRE2_UTF option is set, the strings passed as patterns and subjects +are (by default) checked for validity on entry to the relevant functions. If an +invalid UTF string is passed, a negative error code is returned. The code unit +offset to the offending character can be extracted from the match data block by +calling \fBpcre2_get_startchar()\fP, which is used for this purpose after a UTF +error. +.P +In some situations, you may already know that your strings are valid, and +therefore want to skip these checks in order to improve performance, for +example in the case of a long subject string that is being scanned repeatedly. +If you set the PCRE2_NO_UTF_CHECK option at compile time or at match time, +PCRE2 assumes that the pattern or subject it is given (respectively) contains +only valid UTF code unit sequences. +.P +If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result +is undefined and your program may crash or loop indefinitely or give incorrect +results. There is, however, one mode of matching that can handle invalid UTF +subject strings. This is enabled by passing PCRE2_MATCH_INVALID_UTF to +\fBpcre2_compile()\fP and is discussed below in the next section. The rest of +this section covers the case when PCRE2_MATCH_INVALID_UTF is not set. +.P +Passing PCRE2_NO_UTF_CHECK to \fBpcre2_compile()\fP just disables the UTF check +for the pattern; it does not also apply to subject strings. If you want to +disable the check for a subject string you must pass this same option to +\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. +.P +UTF-16 and UTF-32 strings can indicate their endianness by special code knows +as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting +strings to be in host byte order. +.P +Unless PCRE2_NO_UTF_CHECK is set, a UTF string is checked before any other +processing takes place. In the case of \fBpcre2_match()\fP and +\fBpcre2_dfa_match()\fP calls with a non-zero starting offset, the check is +applied only to that part of the subject that could be inspected during +matching, and there is a check that the starting offset points to the first +code unit of a character or to the end of the subject. If there are no +lookbehind assertions in the pattern, the check starts at the starting offset. +Otherwise, it starts at the length of the longest lookbehind before the +starting offset, or at the start of the subject if there are not that many +characters before the starting offset. Note that the sequences \eb and \eB are +one-character lookbehinds. +.P +In addition to checking the format of the string, there is a check to ensure +that all code points lie in the range U+0 to U+10FFFF, excluding the surrogate +area. The so-called "non-character" code points are not excluded because +Unicode corrigendum #9 makes it clear that they should not be. +.P +Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16, +where they are used in pairs to encode code points with values greater than +0xFFFF. The code points that are encoded by UTF-16 pairs are available +independently in the UTF-8 and UTF-32 encodings. (In other words, the whole +surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and +UTF-32.) +.P +Setting PCRE2_NO_UTF_CHECK at compile time does not disable the error that is +given if an escape sequence for an invalid Unicode code point is encountered in +the pattern. If you want to allow escape sequences such as \ex{d800} (a +surrogate code point) you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra +option. However, this is possible only in UTF-8 and UTF-32 modes, because these +values are not representable in UTF-16. +. +. +.\" HTML +.SS "Errors in UTF-8 strings" +.rs +.sp +The following negative error codes are given for invalid UTF-8 strings: +.sp + PCRE2_ERROR_UTF8_ERR1 + PCRE2_ERROR_UTF8_ERR2 + PCRE2_ERROR_UTF8_ERR3 + PCRE2_ERROR_UTF8_ERR4 + PCRE2_ERROR_UTF8_ERR5 +.sp +The string ends with a truncated UTF-8 character; the code specifies how many +bytes are missing (1 to 5). Although RFC 3629 restricts UTF-8 characters to be +no longer than 4 bytes, the encoding scheme (originally defined by RFC 2279) +allows for up to 6 bytes, and this is checked first; hence the possibility of +4 or 5 missing bytes. +.sp + PCRE2_ERROR_UTF8_ERR6 + PCRE2_ERROR_UTF8_ERR7 + PCRE2_ERROR_UTF8_ERR8 + PCRE2_ERROR_UTF8_ERR9 + PCRE2_ERROR_UTF8_ERR10 +.sp +The two most significant bits of the 2nd, 3rd, 4th, 5th, or 6th byte of the +character do not have the binary value 0b10 (that is, either the most +significant bit is 0, or the next bit is 1). +.sp + PCRE2_ERROR_UTF8_ERR11 + PCRE2_ERROR_UTF8_ERR12 +.sp +A character that is valid by the RFC 2279 rules is either 5 or 6 bytes long; +these code points are excluded by RFC 3629. +.sp + PCRE2_ERROR_UTF8_ERR13 +.sp +A 4-byte character has a value greater than 0x10ffff; these code points are +excluded by RFC 3629. +.sp + PCRE2_ERROR_UTF8_ERR14 +.sp +A 3-byte character has a value in the range 0xd800 to 0xdfff; this range of +code points are reserved by RFC 3629 for use with UTF-16, and so are excluded +from UTF-8. +.sp + PCRE2_ERROR_UTF8_ERR15 + PCRE2_ERROR_UTF8_ERR16 + PCRE2_ERROR_UTF8_ERR17 + PCRE2_ERROR_UTF8_ERR18 + PCRE2_ERROR_UTF8_ERR19 +.sp +A 2-, 3-, 4-, 5-, or 6-byte character is "overlong", that is, it codes for a +value that can be represented by fewer bytes, which is invalid. For example, +the two bytes 0xc0, 0xae give the value 0x2e, whose correct coding uses just +one byte. +.sp + PCRE2_ERROR_UTF8_ERR20 +.sp +The two most significant bits of the first byte of a character have the binary +value 0b10 (that is, the most significant bit is 1 and the second is 0). Such a +byte can only validly occur as the second or subsequent byte of a multi-byte +character. +.sp + PCRE2_ERROR_UTF8_ERR21 +.sp +The first byte of a character has the value 0xfe or 0xff. These values can +never occur in a valid UTF-8 string. +. +. +.\" HTML +.SS "Errors in UTF-16 strings" +.rs +.sp +The following negative error codes are given for invalid UTF-16 strings: +.sp + PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at end of string + PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate follows high surrogate + PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate +.sp +. +. +.\" HTML +.SS "Errors in UTF-32 strings" +.rs +.sp +The following negative error codes are given for invalid UTF-32 strings: +.sp + PCRE2_ERROR_UTF32_ERR1 Surrogate character (0xd800 to 0xdfff) + PCRE2_ERROR_UTF32_ERR2 Code point is greater than 0x10ffff +.sp +. +. +.\" HTML +.SH "MATCHING IN INVALID UTF STRINGS" +.rs +.sp +You can run pattern matches on subject strings that may contain invalid UTF +sequences if you call \fBpcre2_compile()\fP with the PCRE2_MATCH_INVALID_UTF +option. This is supported by \fBpcre2_match()\fP, including JIT matching, but +not by \fBpcre2_dfa_match()\fP. When PCRE2_MATCH_INVALID_UTF is set, it forces +PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a +valid UTF string. +.P +If you do not set PCRE2_MATCH_INVALID_UTF when calling \fBpcre2_compile\fP, and +you are not certain that your subject strings are valid UTF sequences, you +should not make use of the JIT "fast path" function \fBpcre2_jit_match()\fP +because it bypasses sanity checks, including the one for UTF validity. An +invalid string may cause undefined behaviour, including looping, crashing, or +giving the wrong answer. +.P +Setting PCRE2_MATCH_INVALID_UTF does not affect what \fBpcre2_compile()\fP +generates, but if \fBpcre2_jit_compile()\fP is subsequently called, it does +generate different code. If JIT is not used, the option affects the behaviour +of the interpretive code in \fBpcre2_match()\fP. When PCRE2_MATCH_INVALID_UTF +is set at compile time, PCRE2_NO_UTF_CHECK is ignored at match time. +.P +In this mode, an invalid code unit sequence in the subject never matches any +pattern item. It does not match dot, it does not match \ep{Any}, it does not +even match negative items such as [^X]. A lookbehind assertion fails if it +encounters an invalid sequence while moving the current point backwards. In +other words, an invalid UTF code unit sequence acts as a barrier which no match +can cross. +.P +You can also think of this as the subject being split up into fragments of +valid UTF, delimited internally by invalid code unit sequences. The pattern is +matched fragment by fragment. The result of a successful match, however, is +given as code unit offsets in the entire subject string in the usual way. There +are a few points to consider: +.P +The internal boundaries are not interpreted as the beginnings or ends of lines +and so do not match circumflex or dollar characters in the pattern. +.P +If \fBpcre2_match()\fP is called with an offset that points to an invalid +UTF-sequence, that sequence is skipped, and the match starts at the next valid +UTF character, or the end of the subject. +.P +At internal fragment boundaries, \eb and \eB behave in the same way as at the +beginning and end of the subject. For example, a sequence such as \ebWORD\eb +would match an instance of WORD that is surrounded by invalid UTF code units. +.P +Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary +data, knowing that any matched strings that are returned are valid UTF. This +can be useful when searching for UTF text in executable or other binary files. +.P +Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as +sequences of uint16_t or uint32_t code points. They cannot find valid UTF +sequences within an arbitrary string of bytes unless such sequences are +suitably aligned. +. +. +.SH AUTHOR +.rs +.sp +.nf +Philip Hazel +Retired from University Computing Service +Cambridge, England. +.fi +. +. +.SH REVISION +.rs +.sp +.nf +Last updated: 27 November 2024 +Copyright (c) 1997-2024 University of Cambridge. +.fi diff --git a/3rd/pcre2/index.md b/3rd/pcre2/index.md new file mode 100644 index 00000000..d3fff179 --- /dev/null +++ b/3rd/pcre2/index.md @@ -0,0 +1,56 @@ +# PCRE2 - Perl-Compatible Regular Expressions + +The PCRE2 library is a set of C functions that implement regular expression +pattern matching using the same syntax and semantics as Perl 5. PCRE2 has its +own native API, as well as a set of wrapper functions that correspond to the +POSIX regular expression API. The PCRE2 library is free, even for building +proprietary software. It comes in three forms, for processing 8-bit, 16-bit, +or 32-bit code units, in either literal or UTF encoding. + +PCRE2 was first released in 2015 to replace the API in the original PCRE +library, which is now obsolete and no longer maintained. As well as a more +flexible API, the code of PCRE2 has been much improved since the fork. + +## Download + +As well as downloading from the +[GitHub site](https://github.com/PCRE2Project/pcre2), you can download PCRE2 +or the older, unmaintained PCRE1 library from an +[*unofficial* mirror](https://sourceforge.net/projects/pcre/files/) at SourceForge. + +You can check out the PCRE2 source code via Git or Subversion: + + git clone https://github.com/PCRE2Project/pcre2.git + svn co https://github.com/PCRE2Project/pcre2.git + +## Contributed Ports + +If you just need the command-line PCRE2 tools on Windows, precompiled binary +versions are available at this +[Rexegg page](http://www.rexegg.com/pcregrep-pcretest.html). + +A PCRE2 port for z/OS, a mainframe operating system which uses EBCDIC as its +default character encoding, can be found at +[http://www.cbttape.org](http://www.cbttape.org/) (File 939). + +## Documentation + +You can read the PCRE2 documentation +[here](https://PCRE2Project.github.io/pcre2/doc/html/index.html). + +Comparisons to Perl's regular expression semantics can be found in the +community authored Wikipedia entry for PCRE. + +There is a curated summary of changes for each PCRE release, copies of +documentation from older releases, and other useful information from the third +party authored +[RexEgg PCRE Documentation and Change Log page](http://www.rexegg.com/pcre-documentation.html). + +## Contact + +To report a problem with the PCRE2 library, or to make a feature request, please +use the PCRE2 GitHub issues tracker. There is a mailing list for discussion of + PCRE2 issues and development at pcre2-dev@googlegroups.com, which is where any +announcements will be made. You can browse the +[list archives](https://groups.google.com/g/pcre2-dev). + diff --git a/3rd/pcre2/install-sh b/3rd/pcre2/install-sh new file mode 100644 index 00000000..ec298b53 --- /dev/null +++ b/3rd/pcre2/install-sh @@ -0,0 +1,541 @@ +#!/bin/sh +# install - install a program, script, or datafile + +scriptversion=2020-11-14.01; # UTC + +# This originates from X11R5 (mit/util/scripts/install.sh), which was +# later released in X11R6 (xc/config/util/install.sh) with the +# following copyright and license. +# +# Copyright (C) 1994 X Consortium +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- +# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# Except as contained in this notice, the name of the X Consortium shall not +# be used in advertising or otherwise to promote the sale, use or other deal- +# ings in this Software without prior written authorization from the X Consor- +# tium. +# +# +# FSF changes to this file are in the public domain. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# 'make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. + +tab=' ' +nl=' +' +IFS=" $tab$nl" + +# Set DOITPROG to "echo" to test this script. + +doit=${DOITPROG-} +doit_exec=${doit:-exec} + +# Put in absolute file names if you don't have them in your path; +# or use environment vars. + +chgrpprog=${CHGRPPROG-chgrp} +chmodprog=${CHMODPROG-chmod} +chownprog=${CHOWNPROG-chown} +cmpprog=${CMPPROG-cmp} +cpprog=${CPPROG-cp} +mkdirprog=${MKDIRPROG-mkdir} +mvprog=${MVPROG-mv} +rmprog=${RMPROG-rm} +stripprog=${STRIPPROG-strip} + +posix_mkdir= + +# Desired mode of installed file. +mode=0755 + +# Create dirs (including intermediate dirs) using mode 755. +# This is like GNU 'install' as of coreutils 8.32 (2020). +mkdir_umask=22 + +backupsuffix= +chgrpcmd= +chmodcmd=$chmodprog +chowncmd= +mvcmd=$mvprog +rmcmd="$rmprog -f" +stripcmd= + +src= +dst= +dir_arg= +dst_arg= + +copy_on_change=false +is_target_a_directory=possibly + +usage="\ +Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE + or: $0 [OPTION]... SRCFILES... DIRECTORY + or: $0 [OPTION]... -t DIRECTORY SRCFILES... + or: $0 [OPTION]... -d DIRECTORIES... + +In the 1st form, copy SRCFILE to DSTFILE. +In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. +In the 4th, create DIRECTORIES. + +Options: + --help display this help and exit. + --version display version info and exit. + + -c (ignored) + -C install only if different (preserve data modification time) + -d create directories instead of installing files. + -g GROUP $chgrpprog installed files to GROUP. + -m MODE $chmodprog installed files to MODE. + -o USER $chownprog installed files to USER. + -p pass -p to $cpprog. + -s $stripprog installed files. + -S SUFFIX attempt to back up existing files, with suffix SUFFIX. + -t DIRECTORY install into DIRECTORY. + -T report an error if DSTFILE is a directory. + +Environment variables override the default commands: + CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG + RMPROG STRIPPROG + +By default, rm is invoked with -f; when overridden with RMPROG, +it's up to you to specify -f if you want it. + +If -S is not specified, no backups are attempted. + +Email bug reports to bug-automake@gnu.org. +Automake home page: https://www.gnu.org/software/automake/ +" + +while test $# -ne 0; do + case $1 in + -c) ;; + + -C) copy_on_change=true;; + + -d) dir_arg=true;; + + -g) chgrpcmd="$chgrpprog $2" + shift;; + + --help) echo "$usage"; exit $?;; + + -m) mode=$2 + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; + + -o) chowncmd="$chownprog $2" + shift;; + + -p) cpprog="$cpprog -p";; + + -s) stripcmd=$stripprog;; + + -S) backupsuffix="$2" + shift;; + + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; + + -T) is_target_a_directory=never;; + + --version) echo "$0 $scriptversion"; exit $?;; + + --) shift + break;; + + -*) echo "$0: invalid option: $1" >&2 + exit 1;; + + *) break;; + esac + shift +done + +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + +if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then + # When -d is used, all remaining arguments are directories to create. + # When -t is used, the destination is already specified. + # Otherwise, the last argument is the destination. Remove it from $@. + for arg + do + if test -n "$dst_arg"; then + # $@ is not empty: it contains at least $arg. + set fnord "$@" "$dst_arg" + shift # fnord + fi + shift # arg + dst_arg=$arg + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + done +fi + +if test $# -eq 0; then + if test -z "$dir_arg"; then + echo "$0: no input file specified." >&2 + exit 1 + fi + # It's OK to call 'install-sh -d' without argument. + # This can happen when creating conditional directories. + exit 0 +fi + +if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then + do_exit='(exit $ret); exit $ret' + trap "ret=129; $do_exit" 1 + trap "ret=130; $do_exit" 2 + trap "ret=141; $do_exit" 13 + trap "ret=143; $do_exit" 15 + + # Set umask so as not to create temps with too-generous modes. + # However, 'strip' requires both read and write access to temps. + case $mode in + # Optimize common cases. + *644) cp_umask=133;; + *755) cp_umask=22;; + + *[0-7]) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw='% 200' + fi + cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; + *) + if test -z "$stripcmd"; then + u_plus_rw= + else + u_plus_rw=,u+rw + fi + cp_umask=$mode$u_plus_rw;; + esac +fi + +for src +do + # Protect names problematic for 'test' and other utilities. + case $src in + -* | [=\(\)!]) src=./$src;; + esac + + if test -n "$dir_arg"; then + dst=$src + dstdir=$dst + test -d "$dstdir" + dstdir_status=$? + # Don't chown directories that already exist. + if test $dstdir_status = 0; then + chowncmd="" + fi + else + + # Waiting for this to be detected by the "$cpprog $src $dsttmp" command + # might cause directories to be created, which would be especially bad + # if $src (and thus $dsttmp) contains '*'. + if test ! -f "$src" && test ! -d "$src"; then + echo "$0: $src does not exist." >&2 + exit 1 + fi + + if test -z "$dst_arg"; then + echo "$0: no destination specified." >&2 + exit 1 + fi + dst=$dst_arg + + # If destination is a directory, append the input filename. + if test -d "$dst"; then + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 + fi + dstdir=$dst + dstbase=`basename "$src"` + case $dst in + */) dst=$dst$dstbase;; + *) dst=$dst/$dstbase;; + esac + dstdir_status=0 + else + dstdir=`dirname "$dst"` + test -d "$dstdir" + dstdir_status=$? + fi + fi + + case $dstdir in + */) dstdirslash=$dstdir;; + *) dstdirslash=$dstdir/;; + esac + + obsolete_mkdir_used=false + + if test $dstdir_status != 0; then + case $posix_mkdir in + '') + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + # The $RANDOM variable is not portable (e.g., dash). Use it + # here however when possible just to lower collision chance. + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + + trap ' + ret=$? + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null + exit $ret + ' 0 + + # Because "mkdir -p" follows existing symlinks and we likely work + # directly in world-writeable /tmp, make sure that the '$tmpdir' + # directory is successfully created first before we actually test + # 'mkdir -p'. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac + + if + $posix_mkdir && ( + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + ) + then : + else + + # mkdir does not conform to POSIX, + # or it failed possibly due to a race condition. Create the + # directory the slow way, step by step, checking for races as we go. + + case $dstdir in + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; + esac + + oIFS=$IFS + IFS=/ + set -f + set fnord $dstdir + shift + set +f + IFS=$oIFS + + prefixes= + + for d + do + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ + done + + if test -n "$prefixes"; then + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true + fi + fi + fi + + if test -n "$dir_arg"; then + { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && + { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || + test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 + else + + # Make a couple of temp file names in the proper directory. + dsttmp=${dstdirslash}_inst.$$_ + rmtmp=${dstdirslash}_rm.$$_ + + # Trap to clean up those temp files at exit. + trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 + + # Copy the file name to the temp name. + (umask $cp_umask && + { test -z "$stripcmd" || { + # Create $dsttmp read-write so that cp doesn't create it read-only, + # which would cause strip to fail. + if test -z "$doit"; then + : >"$dsttmp" # No need to fork-exec 'touch'. + else + $doit touch "$dsttmp" + fi + } + } && + $doit_exec $cpprog "$src" "$dsttmp") && + + # and set any options; do chmod last to preserve setuid bits. + # + # If any of these fail, we abort the whole thing. If we want to + # ignore errors from any of these, just make sure not to ignore + # errors from the above "$doit $cpprog $src $dsttmp" command. + # + { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && + { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && + { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && + { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && + + # If -C, don't bother to copy if it wouldn't change the file. + if $copy_on_change && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && + set X $old && old=:$2:$4:$5:$6 && + set X $new && new=:$2:$4:$5:$6 && + set +f && + test "$old" = "$new" && + $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 + then + rm -f "$dsttmp" + else + # If $backupsuffix is set, and the file being installed + # already exists, attempt a backup. Don't worry if it fails, + # e.g., if mv doesn't support -f. + if test -n "$backupsuffix" && test -f "$dst"; then + $doit $mvcmd -f "$dst" "$dst$backupsuffix" 2>/dev/null + fi + + # Rename the file to the real destination. + $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || + + # The rename failed, perhaps because mv can't rename something else + # to itself, or perhaps because mv is so ancient that it does not + # support -f. + { + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" + } + fi || exit 1 + + trap '' 0 + fi +done + +# Local variables: +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-start: "scriptversion=" +# time-stamp-format: "%:y-%02m-%02d.%02H" +# time-stamp-time-zone: "UTC0" +# time-stamp-end: "; # UTC" +# End: diff --git a/3rd/pcre2/libpcre2-16.pc.in b/3rd/pcre2/libpcre2-16.pc.in new file mode 100644 index 00000000..bacb4665 --- /dev/null +++ b/3rd/pcre2/libpcre2-16.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-16 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 16 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-16@LIB_POSTFIX@ +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/3rd/pcre2/libpcre2-32.pc.in b/3rd/pcre2/libpcre2-32.pc.in new file mode 100644 index 00000000..06241f06 --- /dev/null +++ b/3rd/pcre2/libpcre2-32.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-32 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 32 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-32@LIB_POSTFIX@ +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/3rd/pcre2/libpcre2-8.pc.in b/3rd/pcre2/libpcre2-8.pc.in new file mode 100644 index 00000000..246bb9ea --- /dev/null +++ b/3rd/pcre2/libpcre2-8.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-8 +Description: PCRE2 - Perl compatible regular expressions C library (2nd API) with 8 bit character support +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-8@LIB_POSTFIX@ +Libs.private: @PTHREAD_CFLAGS@ @PTHREAD_LIBS@ +Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@ diff --git a/3rd/pcre2/libpcre2-posix.pc.in b/3rd/pcre2/libpcre2-posix.pc.in new file mode 100644 index 00000000..2f1e8f10 --- /dev/null +++ b/3rd/pcre2/libpcre2-posix.pc.in @@ -0,0 +1,13 @@ +# Package Information for pkg-config + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libpcre2-posix +Description: Posix compatible interface to libpcre2-8 +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@ +Cflags: -I${includedir} @PCRE2POSIX_CFLAG@ +Requires.private: libpcre2-8 diff --git a/3rd/pcre2/ltmain.sh b/3rd/pcre2/ltmain.sh new file mode 100644 index 00000000..977e5237 --- /dev/null +++ b/3rd/pcre2/ltmain.sh @@ -0,0 +1,11436 @@ +#! /usr/bin/env sh +## DO NOT EDIT - This file generated from ./build-aux/ltmain.in +## by inline-source v2019-02-19.15 + +# libtool (GNU libtool) 2.4.7 +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit , 1996 + +# Copyright (C) 1996-2019, 2021-2022 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, +# if you distribute this file as part of a program or library that +# is built using GNU Libtool, you may include this file under the +# same distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +PROGRAM=libtool +PACKAGE=libtool +VERSION="2.4.7 Debian-2.4.7-7build1" +package_revision=2.4.7 + + +## ------ ## +## Usage. ## +## ------ ## + +# Run './libtool --help' for help with using this script from the +# command line. + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# After configure completes, it has a better idea of some of the +# shell tools we need than the defaults used by the functions shared +# with bootstrap, so set those here where they can still be over- +# ridden by the user, but otherwise take precedence. + +: ${AUTOCONF="autoconf"} +: ${AUTOMAKE="automake"} + + +## -------------------------- ## +## Source external libraries. ## +## -------------------------- ## + +# Much of our low-level functionality needs to be sourced from external +# libraries, which are installed to $pkgauxdir. + +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC + +# General shell script boiler plate, and helper functions. +# Written by Gary V. Vaughan, 2004 + +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2004-2019, 2021 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. + +# Please report bugs or propose patches to: +# + + +## ------ ## +## Usage. ## +## ------ ## + +# Evaluate this file near the top of your script to gain access to +# the functions and variables defined here: +# +# . `echo "$0" | ${SED-sed} 's|[^/]*$||'`/build-aux/funclib.sh +# +# If you need to override any of the default environment variable +# settings, do that before evaluating this file. + + +## -------------------- ## +## Shell normalisation. ## +## -------------------- ## + +# Some shells need a little help to be as Bourne compatible as possible. +# Before doing anything else, make sure all that help has been provided! + +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac +fi + +# NLS nuisances: We save the old values in case they are required later. +_G_user_locale= +_G_safe_locale= +for _G_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES +do + eval "if test set = \"\${$_G_var+set}\"; then + save_$_G_var=\$$_G_var + $_G_var=C + export $_G_var + _G_user_locale=\"$_G_var=\\\$save_\$_G_var; \$_G_user_locale\" + _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\" + fi" +done +# These NLS vars are set unconditionally (bootstrap issue #24). Unset those +# in case the environment reset is needed later and the $save_* variant is not +# defined (see the code above). +LC_ALL=C +LANGUAGE=C +export LANGUAGE LC_ALL + +# Make sure IFS has a sensible default +sp=' ' +nl=' +' +IFS="$sp $nl" + +# There are apparently some retarded systems that use ';' as a PATH separator! +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# func_unset VAR +# -------------- +# Portably unset VAR. +# In some shells, an 'unset VAR' statement leaves a non-zero return +# status if VAR is already unset, which might be problematic if the +# statement is used at the end of a function (thus poisoning its return +# value) or when 'set -e' is active (causing even a spurious abort of +# the script in this case). +func_unset () +{ + { eval $1=; (eval unset $1) >/dev/null 2>&1 && eval unset $1 || : ; } +} + + +# Make sure CDPATH doesn't cause `cd` commands to output the target dir. +func_unset CDPATH + +# Make sure ${,E,F}GREP behave sanely. +func_unset GREP_OPTIONS + + +## ------------------------- ## +## Locate command utilities. ## +## ------------------------- ## + + +# func_executable_p FILE +# ---------------------- +# Check that FILE is an executable regular file. +func_executable_p () +{ + test -f "$1" && test -x "$1" +} + + +# func_path_progs PROGS_LIST CHECK_FUNC [PATH] +# -------------------------------------------- +# Search for either a program that responds to --version with output +# containing "GNU", or else returned by CHECK_FUNC otherwise, by +# trying all the directories in PATH with each of the elements of +# PROGS_LIST. +# +# CHECK_FUNC should accept the path to a candidate program, and +# set $func_check_prog_result if it truncates its output less than +# $_G_path_prog_max characters. +func_path_progs () +{ + _G_progs_list=$1 + _G_check_func=$2 + _G_PATH=${3-"$PATH"} + + _G_path_prog_max=0 + _G_path_prog_found=false + _G_save_IFS=$IFS; IFS=${PATH_SEPARATOR-:} + for _G_dir in $_G_PATH; do + IFS=$_G_save_IFS + test -z "$_G_dir" && _G_dir=. + for _G_prog_name in $_G_progs_list; do + for _exeext in '' .EXE; do + _G_path_prog=$_G_dir/$_G_prog_name$_exeext + func_executable_p "$_G_path_prog" || continue + case `"$_G_path_prog" --version 2>&1` in + *GNU*) func_path_progs_result=$_G_path_prog _G_path_prog_found=: ;; + *) $_G_check_func $_G_path_prog + func_path_progs_result=$func_check_prog_result + ;; + esac + $_G_path_prog_found && break 3 + done + done + done + IFS=$_G_save_IFS + test -z "$func_path_progs_result" && { + echo "no acceptable sed could be found in \$PATH" >&2 + exit 1 + } +} + + +# We want to be able to use the functions in this file before configure +# has figured out where the best binaries are kept, which means we have +# to search for them ourselves - except when the results are already set +# where we skip the searches. + +# Unless the user overrides by setting SED, search the path for either GNU +# sed, or the sed that truncates its output the least. +test -z "$SED" && { + _G_sed_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ + for _G_i in 1 2 3 4 5 6 7; do + _G_sed_script=$_G_sed_script$nl$_G_sed_script + done + echo "$_G_sed_script" 2>/dev/null | sed 99q >conftest.sed + _G_sed_script= + + func_check_prog_sed () + { + _G_path_prog=$1 + + _G_count=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo '' >> conftest.nl + "$_G_path_prog" -f conftest.sed conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "sed gsed" func_check_prog_sed "$PATH:/usr/xpg4/bin" + rm -f conftest.sed + SED=$func_path_progs_result +} + + +# Unless the user overrides by setting GREP, search the path for either GNU +# grep, or the grep that truncates its output the least. +test -z "$GREP" && { + func_check_prog_grep () + { + _G_path_prog=$1 + + _G_count=0 + _G_path_prog_max=0 + printf 0123456789 >conftest.in + while : + do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo 'GREP' >> conftest.nl + "$_G_path_prog" -e 'GREP$' -e '-(cannot match)-' conftest.out 2>/dev/null || break + diff conftest.out conftest.nl >/dev/null 2>&1 || break + _G_count=`expr $_G_count + 1` + if test "$_G_count" -gt "$_G_path_prog_max"; then + # Best one so far, save it but keep looking for a better one + func_check_prog_result=$_G_path_prog + _G_path_prog_max=$_G_count + fi + # 10*(2^10) chars as input seems more than enough + test 10 -lt "$_G_count" && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out + } + + func_path_progs "grep ggrep" func_check_prog_grep "$PATH:/usr/xpg4/bin" + GREP=$func_path_progs_result +} + + +## ------------------------------- ## +## User overridable command paths. ## +## ------------------------------- ## + +# All uppercase variable names are used for environment variables. These +# variables can be overridden by the user before calling a script that +# uses them if a suitable command of that name is not already available +# in the command search PATH. + +: ${CP="cp -f"} +: ${ECHO="printf %s\n"} +: ${EGREP="$GREP -E"} +: ${FGREP="$GREP -F"} +: ${LN_S="ln -s"} +: ${MAKE="make"} +: ${MKDIR="mkdir"} +: ${MV="mv -f"} +: ${RM="rm -f"} +: ${SHELL="${CONFIG_SHELL-/bin/sh}"} + + +## -------------------- ## +## Useful sed snippets. ## +## -------------------- ## + +sed_dirname='s|/[^/]*$||' +sed_basename='s|^.*/||' + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s|\([`"$\\]\)|\\\1|g' + +# Same as above, but do not quote variable references. +sed_double_quote_subst='s/\(["`\\]\)/\\\1/g' + +# Sed substitution that turns a string into a regex matching for the +# string literally. +sed_make_literal_regex='s|[].[^$\\*\/]|\\&|g' + +# Sed substitution that converts a w32 file name or path +# that contains forward slashes, into one that contains +# (escaped) backslashes. A very naive implementation. +sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' + +# Re-'\' parameter expansions in output of sed_double_quote_subst that +# were '\'-ed in input to the same. If an odd number of '\' preceded a +# '$' in input to sed_double_quote_subst, that '$' was protected from +# expansion. Since each input '\' is now two '\'s, look for any number +# of runs of four '\'s followed by two '\'s and then a '$'. '\' that '$'. +_G_bs='\\' +_G_bs2='\\\\' +_G_bs4='\\\\\\\\' +_G_dollar='\$' +sed_double_backslash="\ + s/$_G_bs4/&\\ +/g + s/^$_G_bs2$_G_dollar/$_G_bs&/ + s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g + s/\n//g" + +# require_check_ifs_backslash +# --------------------------- +# Check if we can use backslash as IFS='\' separator, and set +# $check_ifs_backshlash_broken to ':' or 'false'. +require_check_ifs_backslash=func_require_check_ifs_backslash +func_require_check_ifs_backslash () +{ + _G_save_IFS=$IFS + IFS='\' + _G_check_ifs_backshlash='a\\b' + for _G_i in $_G_check_ifs_backshlash + do + case $_G_i in + a) + check_ifs_backshlash_broken=false + ;; + '') + break + ;; + *) + check_ifs_backshlash_broken=: + break + ;; + esac + done + IFS=$_G_save_IFS + require_check_ifs_backslash=: +} + + +## ----------------- ## +## Global variables. ## +## ----------------- ## + +# Except for the global variables explicitly listed below, the following +# functions in the '^func_' namespace, and the '^require_' namespace +# variables initialised in the 'Resource management' section, sourcing +# this file will not pollute your global namespace with anything +# else. There's no portable way to scope variables in Bourne shell +# though, so actually running these functions will sometimes place +# results into a variable named after the function, and often use +# temporary variables in the '^_G_' namespace. If you are careful to +# avoid using those namespaces casually in your sourcing script, things +# should continue to work as you expect. And, of course, you can freely +# overwrite any of the functions or variables defined here before +# calling anything to customize them. + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. +EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. + +# Allow overriding, eg assuming that you follow the convention of +# putting '$debug_cmd' at the start of all your functions, you can get +# bash to show function call trace with: +# +# debug_cmd='echo "${FUNCNAME[0]} $*" >&2' bash your-script-name +debug_cmd=${debug_cmd-":"} +exit_cmd=: + +# By convention, finish your script with: +# +# exit $exit_status +# +# so that you can set exit_status to non-zero if you want to indicate +# something went wrong during execution without actually bailing out at +# the point of failure. +exit_status=$EXIT_SUCCESS + +# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh +# is ksh but when the shell is invoked as "sh" and the current value of +# the _XPG environment variable is not equal to 1 (one), the special +# positional parameter $0, within a function call, is the name of the +# function. +progpath=$0 + +# The name of this program. +progname=`$ECHO "$progpath" |$SED "$sed_basename"` + +# Make sure we have an absolute progpath for reexecution: +case $progpath in + [\\/]*|[A-Za-z]:\\*) ;; + *[\\/]*) + progdir=`$ECHO "$progpath" |$SED "$sed_dirname"` + progdir=`cd "$progdir" && pwd` + progpath=$progdir/$progname + ;; + *) + _G_IFS=$IFS + IFS=${PATH_SEPARATOR-:} + for progdir in $PATH; do + IFS=$_G_IFS + test -x "$progdir/$progname" && break + done + IFS=$_G_IFS + test -n "$progdir" || progdir=`pwd` + progpath=$progdir/$progname + ;; +esac + + +## ----------------- ## +## Standard options. ## +## ----------------- ## + +# The following options affect the operation of the functions defined +# below, and should be set appropriately depending on run-time para- +# meters passed on the command line. + +opt_dry_run=false +opt_quiet=false +opt_verbose=false + +# Categories 'all' and 'none' are always available. Append any others +# you will pass as the first argument to func_warning from your own +# code. +warning_categories= + +# By default, display warnings according to 'opt_warning_types'. Set +# 'warning_func' to ':' to elide all warnings, or func_fatal_error to +# treat the next displayed warning as a fatal error. +warning_func=func_warn_and_continue + +# Set to 'all' to display all warnings, 'none' to suppress all +# warnings, or a space delimited list of some subset of +# 'warning_categories' to display only the listed warnings. +opt_warning_types=all + + +## -------------------- ## +## Resource management. ## +## -------------------- ## + +# This section contains definitions for functions that each ensure a +# particular resource (a file, or a non-empty configuration variable for +# example) is available, and if appropriate to extract default values +# from pertinent package files. Call them using their associated +# 'require_*' variable to ensure that they are executed, at most, once. +# +# It's entirely deliberate that calling these functions can set +# variables that don't obey the namespace limitations obeyed by the rest +# of this file, in order that that they be as useful as possible to +# callers. + + +# require_term_colors +# ------------------- +# Allow display of bold text on terminals that support it. +require_term_colors=func_require_term_colors +func_require_term_colors () +{ + $debug_cmd + + test -t 1 && { + # COLORTERM and USE_ANSI_COLORS environment variables take + # precedence, because most terminfo databases neglect to describe + # whether color sequences are supported. + test -n "${COLORTERM+set}" && : ${USE_ANSI_COLORS="1"} + + if test 1 = "$USE_ANSI_COLORS"; then + # Standard ANSI escape sequences + tc_reset='' + tc_bold=''; tc_standout='' + tc_red=''; tc_green='' + tc_blue=''; tc_cyan='' + else + # Otherwise trust the terminfo database after all. + test -n "`tput sgr0 2>/dev/null`" && { + tc_reset=`tput sgr0` + test -n "`tput bold 2>/dev/null`" && tc_bold=`tput bold` + tc_standout=$tc_bold + test -n "`tput smso 2>/dev/null`" && tc_standout=`tput smso` + test -n "`tput setaf 1 2>/dev/null`" && tc_red=`tput setaf 1` + test -n "`tput setaf 2 2>/dev/null`" && tc_green=`tput setaf 2` + test -n "`tput setaf 4 2>/dev/null`" && tc_blue=`tput setaf 4` + test -n "`tput setaf 5 2>/dev/null`" && tc_cyan=`tput setaf 5` + } + fi + } + + require_term_colors=: +} + + +## ----------------- ## +## Function library. ## +## ----------------- ## + +# This section contains a variety of useful functions to call in your +# scripts. Take note of the portable wrappers for features provided by +# some modern shells, which will fall back to slower equivalents on +# less featureful shells. + + +# func_append VAR VALUE +# --------------------- +# Append VALUE onto the existing contents of VAR. + + # _G_HAVE_PLUSEQ_OP + # Can be empty, in which case the shell is probed, "yes" if += is + # useable or anything else if it does not work. + if test -z "$_G_HAVE_PLUSEQ_OP" && \ + __PLUSEQ_TEST="a" && \ + __PLUSEQ_TEST+=" b" 2>/dev/null && \ + test "a b" = "$__PLUSEQ_TEST"; then + _G_HAVE_PLUSEQ_OP=yes + fi + +if test yes = "$_G_HAVE_PLUSEQ_OP" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_append () + { + $debug_cmd + + eval "$1+=\$2" + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_append () + { + $debug_cmd + + eval "$1=\$$1\$2" + } +fi + + +# func_append_quoted VAR VALUE +# ---------------------------- +# Quote VALUE and append to the end of shell variable VAR, separated +# by a space. +if test yes = "$_G_HAVE_PLUSEQ_OP"; then + eval 'func_append_quoted () + { + $debug_cmd + + func_quote_arg pretty "$2" + eval "$1+=\\ \$func_quote_arg_result" + }' +else + func_append_quoted () + { + $debug_cmd + + func_quote_arg pretty "$2" + eval "$1=\$$1\\ \$func_quote_arg_result" + } +fi + + +# func_append_uniq VAR VALUE +# -------------------------- +# Append unique VALUE onto the existing contents of VAR, assuming +# entries are delimited by the first character of VALUE. For example: +# +# func_append_uniq options " --another-option option-argument" +# +# will only append to $options if " --another-option option-argument " +# is not already present somewhere in $options already (note spaces at +# each end implied by leading space in second argument). +func_append_uniq () +{ + $debug_cmd + + eval _G_current_value='`$ECHO $'$1'`' + _G_delim=`expr "$2" : '\(.\)'` + + case $_G_delim$_G_current_value$_G_delim in + *"$2$_G_delim"*) ;; + *) func_append "$@" ;; + esac +} + + +# func_arith TERM... +# ------------------ +# Set func_arith_result to the result of evaluating TERMs. + test -z "$_G_HAVE_ARITH_OP" \ + && (eval 'test 2 = $(( 1 + 1 ))') 2>/dev/null \ + && _G_HAVE_ARITH_OP=yes + +if test yes = "$_G_HAVE_ARITH_OP"; then + eval 'func_arith () + { + $debug_cmd + + func_arith_result=$(( $* )) + }' +else + func_arith () + { + $debug_cmd + + func_arith_result=`expr "$@"` + } +fi + + +# func_basename FILE +# ------------------ +# Set func_basename_result to FILE with everything up to and including +# the last / stripped. +if test yes = "$_G_HAVE_XSI_OPS"; then + # If this shell supports suffix pattern removal, then use it to avoid + # forking. Hide the definitions single quotes in case the shell chokes + # on unsupported syntax... + _b='func_basename_result=${1##*/}' + _d='case $1 in + */*) func_dirname_result=${1%/*}$2 ;; + * ) func_dirname_result=$3 ;; + esac' + +else + # ...otherwise fall back to using sed. + _b='func_basename_result=`$ECHO "$1" |$SED "$sed_basename"`' + _d='func_dirname_result=`$ECHO "$1" |$SED "$sed_dirname"` + if test "X$func_dirname_result" = "X$1"; then + func_dirname_result=$3 + else + func_append func_dirname_result "$2" + fi' +fi + +eval 'func_basename () +{ + $debug_cmd + + '"$_b"' +}' + + +# func_dirname FILE APPEND NONDIR_REPLACEMENT +# ------------------------------------------- +# Compute the dirname of FILE. If nonempty, add APPEND to the result, +# otherwise set result to NONDIR_REPLACEMENT. +eval 'func_dirname () +{ + $debug_cmd + + '"$_d"' +}' + + +# func_dirname_and_basename FILE APPEND NONDIR_REPLACEMENT +# -------------------------------------------------------- +# Perform func_basename and func_dirname in a single function +# call: +# dirname: Compute the dirname of FILE. If nonempty, +# add APPEND to the result, otherwise set result +# to NONDIR_REPLACEMENT. +# value returned in "$func_dirname_result" +# basename: Compute filename of FILE. +# value retuned in "$func_basename_result" +# For efficiency, we do not delegate to the functions above but instead +# duplicate the functionality here. +eval 'func_dirname_and_basename () +{ + $debug_cmd + + '"$_b"' + '"$_d"' +}' + + +# func_echo ARG... +# ---------------- +# Echo program name prefixed message. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_echo_all ARG... +# -------------------- +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + + +# func_echo_infix_1 INFIX ARG... +# ------------------------------ +# Echo program name, followed by INFIX on the first line, with any +# additional lines not showing INFIX. +func_echo_infix_1 () +{ + $debug_cmd + + $require_term_colors + + _G_infix=$1; shift + _G_indent=$_G_infix + _G_prefix="$progname: $_G_infix: " + _G_message=$* + + # Strip color escape sequences before counting printable length + for _G_tc in "$tc_reset" "$tc_bold" "$tc_standout" "$tc_red" "$tc_green" "$tc_blue" "$tc_cyan" + do + test -n "$_G_tc" && { + _G_esc_tc=`$ECHO "$_G_tc" | $SED "$sed_make_literal_regex"` + _G_indent=`$ECHO "$_G_indent" | $SED "s|$_G_esc_tc||g"` + } + done + _G_indent="$progname: "`echo "$_G_indent" | $SED 's|.| |g'`" " ## exclude from sc_prohibit_nested_quotes + + func_echo_infix_1_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_infix_1_IFS + $ECHO "$_G_prefix$tc_bold$_G_line$tc_reset" >&2 + _G_prefix=$_G_indent + done + IFS=$func_echo_infix_1_IFS +} + + +# func_error ARG... +# ----------------- +# Echo program name prefixed message to standard error. +func_error () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 " $tc_standout${tc_red}error$tc_reset" "$*" >&2 +} + + +# func_fatal_error ARG... +# ----------------------- +# Echo program name prefixed message to standard error, and exit. +func_fatal_error () +{ + $debug_cmd + + func_error "$*" + exit $EXIT_FAILURE +} + + +# func_grep EXPRESSION FILENAME +# ----------------------------- +# Check whether EXPRESSION matches any line of FILENAME, without output. +func_grep () +{ + $debug_cmd + + $GREP "$1" "$2" >/dev/null 2>&1 +} + + +# func_len STRING +# --------------- +# Set func_len_result to the length of STRING. STRING may not +# start with a hyphen. + test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_len () + { + $debug_cmd + + func_len_result=${#1} + }' +else + func_len () + { + $debug_cmd + + func_len_result=`expr "$1" : ".*" 2>/dev/null || echo $max_cmd_len` + } +fi + + +# func_mkdir_p DIRECTORY-PATH +# --------------------------- +# Make sure the entire path to DIRECTORY-PATH is available. +func_mkdir_p () +{ + $debug_cmd + + _G_directory_path=$1 + _G_dir_list= + + if test -n "$_G_directory_path" && test : != "$opt_dry_run"; then + + # Protect directory names starting with '-' + case $_G_directory_path in + -*) _G_directory_path=./$_G_directory_path ;; + esac + + # While some portion of DIR does not yet exist... + while test ! -d "$_G_directory_path"; do + # ...make a list in topmost first order. Use a colon delimited + # list incase some portion of path contains whitespace. + _G_dir_list=$_G_directory_path:$_G_dir_list + + # If the last portion added has no slash in it, the list is done + case $_G_directory_path in */*) ;; *) break ;; esac + + # ...otherwise throw away the child directory and loop + _G_directory_path=`$ECHO "$_G_directory_path" | $SED -e "$sed_dirname"` + done + _G_dir_list=`$ECHO "$_G_dir_list" | $SED 's|:*$||'` + + func_mkdir_p_IFS=$IFS; IFS=: + for _G_dir in $_G_dir_list; do + IFS=$func_mkdir_p_IFS + # mkdir can fail with a 'File exist' error if two processes + # try to create one of the directories concurrently. Don't + # stop in that case! + $MKDIR "$_G_dir" 2>/dev/null || : + done + IFS=$func_mkdir_p_IFS + + # Bail out if we (or some other process) failed to create a directory. + test -d "$_G_directory_path" || \ + func_fatal_error "Failed to create '$1'" + fi +} + + +# func_mktempdir [BASENAME] +# ------------------------- +# Make a temporary directory that won't clash with other running +# libtool processes, and avoids race conditions if possible. If +# given, BASENAME is the basename for that directory. +func_mktempdir () +{ + $debug_cmd + + _G_template=${TMPDIR-/tmp}/${1-$progname} + + if test : = "$opt_dry_run"; then + # Return a directory name, but don't create it in dry-run mode + _G_tmpdir=$_G_template-$$ + else + + # If mktemp works, use that first and foremost + _G_tmpdir=`mktemp -d "$_G_template-XXXXXXXX" 2>/dev/null` + + if test ! -d "$_G_tmpdir"; then + # Failing that, at least try and use $RANDOM to avoid a race + _G_tmpdir=$_G_template-${RANDOM-0}$$ + + func_mktempdir_umask=`umask` + umask 0077 + $MKDIR "$_G_tmpdir" + umask $func_mktempdir_umask + fi + + # If we're not in dry-run mode, bomb out on failure + test -d "$_G_tmpdir" || \ + func_fatal_error "cannot create temporary directory '$_G_tmpdir'" + fi + + $ECHO "$_G_tmpdir" +} + + +# func_normal_abspath PATH +# ------------------------ +# Remove doubled-up and trailing slashes, "." path components, +# and cancel out any ".." path components in PATH after making +# it an absolute path. +func_normal_abspath () +{ + $debug_cmd + + # These SED scripts presuppose an absolute path with a trailing slash. + _G_pathcar='s|^/\([^/]*\).*$|\1|' + _G_pathcdr='s|^/[^/]*||' + _G_removedotparts=':dotsl + s|/\./|/|g + t dotsl + s|/\.$|/|' + _G_collapseslashes='s|/\{1,\}|/|g' + _G_finalslash='s|/*$|/|' + + # Start from root dir and reassemble the path. + func_normal_abspath_result= + func_normal_abspath_tpath=$1 + func_normal_abspath_altnamespace= + case $func_normal_abspath_tpath in + "") + # Empty path, that just means $cwd. + func_stripname '' '/' "`pwd`" + func_normal_abspath_result=$func_stripname_result + return + ;; + # The next three entries are used to spot a run of precisely + # two leading slashes without using negated character classes; + # we take advantage of case's first-match behaviour. + ///*) + # Unusual form of absolute path, do nothing. + ;; + //*) + # Not necessarily an ordinary path; POSIX reserves leading '//' + # and for example Cygwin uses it to access remote file shares + # over CIFS/SMB, so we conserve a leading double slash if found. + func_normal_abspath_altnamespace=/ + ;; + /*) + # Absolute path, do nothing. + ;; + *) + # Relative path, prepend $cwd. + func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath + ;; + esac + + # Cancel out all the simple stuff to save iterations. We also want + # the path to end with a slash for ease of parsing, so make sure + # there is one (and only one) here. + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_removedotparts" -e "$_G_collapseslashes" -e "$_G_finalslash"` + while :; do + # Processed it all yet? + if test / = "$func_normal_abspath_tpath"; then + # If we ascended to the root using ".." the result may be empty now. + if test -z "$func_normal_abspath_result"; then + func_normal_abspath_result=/ + fi + break + fi + func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcar"` + func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ + -e "$_G_pathcdr"` + # Figure out what to do with it + case $func_normal_abspath_tcomponent in + "") + # Trailing empty path component, ignore it. + ;; + ..) + # Parent dir; strip last assembled component from result. + func_dirname "$func_normal_abspath_result" + func_normal_abspath_result=$func_dirname_result + ;; + *) + # Actual path component, append it. + func_append func_normal_abspath_result "/$func_normal_abspath_tcomponent" + ;; + esac + done + # Restore leading double-slash if one was found on entry. + func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result +} + + +# func_notquiet ARG... +# -------------------- +# Echo program name prefixed message only when not in quiet mode. +func_notquiet () +{ + $debug_cmd + + $opt_quiet || func_echo ${1+"$@"} + + # A bug in bash halts the script if the last line of a function + # fails when set -e is in force, so we need another command to + # work around that: + : +} + + +# func_relative_path SRCDIR DSTDIR +# -------------------------------- +# Set func_relative_path_result to the relative path from SRCDIR to DSTDIR. +func_relative_path () +{ + $debug_cmd + + func_relative_path_result= + func_normal_abspath "$1" + func_relative_path_tlibdir=$func_normal_abspath_result + func_normal_abspath "$2" + func_relative_path_tbindir=$func_normal_abspath_result + + # Ascend the tree starting from libdir + while :; do + # check if we have found a prefix of bindir + case $func_relative_path_tbindir in + $func_relative_path_tlibdir) + # found an exact match + func_relative_path_tcancelled= + break + ;; + $func_relative_path_tlibdir*) + # found a matching prefix + func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" + func_relative_path_tcancelled=$func_stripname_result + if test -z "$func_relative_path_result"; then + func_relative_path_result=. + fi + break + ;; + *) + func_dirname $func_relative_path_tlibdir + func_relative_path_tlibdir=$func_dirname_result + if test -z "$func_relative_path_tlibdir"; then + # Have to descend all the way to the root! + func_relative_path_result=../$func_relative_path_result + func_relative_path_tcancelled=$func_relative_path_tbindir + break + fi + func_relative_path_result=../$func_relative_path_result + ;; + esac + done + + # Now calculate path; take care to avoid doubling-up slashes. + func_stripname '' '/' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + func_stripname '/' '/' "$func_relative_path_tcancelled" + if test -n "$func_stripname_result"; then + func_append func_relative_path_result "/$func_stripname_result" + fi + + # Normalisation. If bindir is libdir, return '.' else relative path. + if test -n "$func_relative_path_result"; then + func_stripname './' '' "$func_relative_path_result" + func_relative_path_result=$func_stripname_result + fi + + test -n "$func_relative_path_result" || func_relative_path_result=. + + : +} + + +# func_quote_portable EVAL ARG +# ---------------------------- +# Internal function to portably implement func_quote_arg. Note that we still +# keep attention to performance here so we as much as possible try to avoid +# calling sed binary (so far O(N) complexity as long as func_append is O(1)). +func_quote_portable () +{ + $debug_cmd + + $require_check_ifs_backslash + + func_quote_portable_result=$2 + + # one-time-loop (easy break) + while true + do + if $1; then + func_quote_portable_result=`$ECHO "$2" | $SED \ + -e "$sed_double_quote_subst" -e "$sed_double_backslash"` + break + fi + + # Quote for eval. + case $func_quote_portable_result in + *[\\\`\"\$]*) + # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string + # contains the shell wildcard characters. + case $check_ifs_backshlash_broken$func_quote_portable_result in + :*|*[\[\*\?]*) + func_quote_portable_result=`$ECHO "$func_quote_portable_result" \ + | $SED "$sed_quote_subst"` + break + ;; + esac + + func_quote_portable_old_IFS=$IFS + for _G_char in '\' '`' '"' '$' + do + # STATE($1) PREV($2) SEPARATOR($3) + set start "" "" + func_quote_portable_result=dummy"$_G_char$func_quote_portable_result$_G_char"dummy + IFS=$_G_char + for _G_part in $func_quote_portable_result + do + case $1 in + quote) + func_append func_quote_portable_result "$3$2" + set quote "$_G_part" "\\$_G_char" + ;; + start) + set first "" "" + func_quote_portable_result= + ;; + first) + set quote "$_G_part" "" + ;; + esac + done + done + IFS=$func_quote_portable_old_IFS + ;; + *) ;; + esac + break + done + + func_quote_portable_unquoted_result=$func_quote_portable_result + case $func_quote_portable_result in + # double-quote args containing shell metacharacters to delay + # word splitting, command substitution and variable expansion + # for a subsequent eval. + # many bourne shells cannot handle close brackets correctly + # in scan sets, so we specify it separately. + *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") + func_quote_portable_result=\"$func_quote_portable_result\" + ;; + esac +} + + +# func_quotefast_eval ARG +# ----------------------- +# Quote one ARG (internal). This is equivalent to 'func_quote_arg eval ARG', +# but optimized for speed. Result is stored in $func_quotefast_eval. +if test xyes = `(x=; printf -v x %q yes; echo x"$x") 2>/dev/null`; then + printf -v _GL_test_printf_tilde %q '~' + if test '\~' = "$_GL_test_printf_tilde"; then + func_quotefast_eval () + { + printf -v func_quotefast_eval_result %q "$1" + } + else + # Broken older Bash implementations. Make those faster too if possible. + func_quotefast_eval () + { + case $1 in + '~'*) + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + ;; + *) + printf -v func_quotefast_eval_result %q "$1" + ;; + esac + } + fi +else + func_quotefast_eval () + { + func_quote_portable false "$1" + func_quotefast_eval_result=$func_quote_portable_result + } +fi + + +# func_quote_arg MODEs ARG +# ------------------------ +# Quote one ARG to be evaled later. MODEs argument may contain zero or more +# specifiers listed below separated by ',' character. This function returns two +# values: +# i) func_quote_arg_result +# double-quoted (when needed), suitable for a subsequent eval +# ii) func_quote_arg_unquoted_result +# has all characters that are still active within double +# quotes backslashified. Available only if 'unquoted' is specified. +# +# Available modes: +# ---------------- +# 'eval' (default) +# - escape shell special characters +# 'expand' +# - the same as 'eval'; but do not quote variable references +# 'pretty' +# - request aesthetic output, i.e. '"a b"' instead of 'a\ b'. This might +# be used later in func_quote to get output like: 'echo "a b"' instead +# of 'echo a\ b'. This is slower than default on some shells. +# 'unquoted' +# - produce also $func_quote_arg_unquoted_result which does not contain +# wrapping double-quotes. +# +# Examples for 'func_quote_arg pretty,unquoted string': +# +# string | *_result | *_unquoted_result +# ------------+-----------------------+------------------- +# " | \" | \" +# a b | "a b" | a b +# "a b" | "\"a b\"" | \"a b\" +# * | "*" | * +# z="${x-$y}" | "z=\"\${x-\$y}\"" | z=\"\${x-\$y}\" +# +# Examples for 'func_quote_arg pretty,unquoted,expand string': +# +# string | *_result | *_unquoted_result +# --------------+---------------------+-------------------- +# z="${x-$y}" | "z=\"${x-$y}\"" | z=\"${x-$y}\" +func_quote_arg () +{ + _G_quote_expand=false + case ,$1, in + *,expand,*) + _G_quote_expand=: + ;; + esac + + case ,$1, in + *,pretty,*|*,expand,*|*,unquoted,*) + func_quote_portable $_G_quote_expand "$2" + func_quote_arg_result=$func_quote_portable_result + func_quote_arg_unquoted_result=$func_quote_portable_unquoted_result + ;; + *) + # Faster quote-for-eval for some shells. + func_quotefast_eval "$2" + func_quote_arg_result=$func_quotefast_eval_result + ;; + esac +} + + +# func_quote MODEs ARGs... +# ------------------------ +# Quote all ARGs to be evaled later and join them into single command. See +# func_quote_arg's description for more info. +func_quote () +{ + $debug_cmd + _G_func_quote_mode=$1 ; shift + func_quote_result= + while test 0 -lt $#; do + func_quote_arg "$_G_func_quote_mode" "$1" + if test -n "$func_quote_result"; then + func_append func_quote_result " $func_quote_arg_result" + else + func_append func_quote_result "$func_quote_arg_result" + fi + shift + done +} + + +# func_stripname PREFIX SUFFIX NAME +# --------------------------------- +# strip PREFIX and SUFFIX from NAME, and store in func_stripname_result. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_stripname () + { + $debug_cmd + + # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are + # positional parameters, so assign one to ordinary variable first. + func_stripname_result=$3 + func_stripname_result=${func_stripname_result#"$1"} + func_stripname_result=${func_stripname_result%"$2"} + }' +else + func_stripname () + { + $debug_cmd + + case $2 in + .*) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%\\\\$2\$%%"`;; + *) func_stripname_result=`$ECHO "$3" | $SED -e "s%^$1%%" -e "s%$2\$%%"`;; + esac + } +fi + + +# func_show_eval CMD [FAIL_EXP] +# ----------------------------- +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. +func_show_eval () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + func_quote_arg pretty,expand "$_G_cmd" + eval "func_notquiet $func_quote_arg_result" + + $opt_dry_run || { + eval "$_G_cmd" + _G_status=$? + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_show_eval_locale CMD [FAIL_EXP] +# ------------------------------------ +# Unless opt_quiet is true, then output CMD. Then, if opt_dryrun is +# not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP +# is given, then evaluate it. Use the saved locale for evaluation. +func_show_eval_locale () +{ + $debug_cmd + + _G_cmd=$1 + _G_fail_exp=${2-':'} + + $opt_quiet || { + func_quote_arg expand,pretty "$_G_cmd" + eval "func_echo $func_quote_arg_result" + } + + $opt_dry_run || { + eval "$_G_user_locale + $_G_cmd" + _G_status=$? + eval "$_G_safe_locale" + if test 0 -ne "$_G_status"; then + eval "(exit $_G_status); $_G_fail_exp" + fi + } +} + + +# func_tr_sh +# ---------- +# Turn $1 into a string suitable for a shell variable name. +# Result is stored in $func_tr_sh_result. All characters +# not in the set a-zA-Z0-9_ are replaced with '_'. Further, +# if $1 begins with a digit, a '_' is prepended as well. +func_tr_sh () +{ + $debug_cmd + + case $1 in + [0-9]* | *[!a-zA-Z0-9_]*) + func_tr_sh_result=`$ECHO "$1" | $SED -e 's/^\([0-9]\)/_\1/' -e 's/[^a-zA-Z0-9_]/_/g'` + ;; + * ) + func_tr_sh_result=$1 + ;; + esac +} + + +# func_verbose ARG... +# ------------------- +# Echo program name prefixed message in verbose mode only. +func_verbose () +{ + $debug_cmd + + $opt_verbose && func_echo "$*" + + : +} + + +# func_warn_and_continue ARG... +# ----------------------------- +# Echo program name prefixed warning message to standard error. +func_warn_and_continue () +{ + $debug_cmd + + $require_term_colors + + func_echo_infix_1 "${tc_red}warning$tc_reset" "$*" >&2 +} + + +# func_warning CATEGORY ARG... +# ---------------------------- +# Echo program name prefixed warning message to standard error. Warning +# messages can be filtered according to CATEGORY, where this function +# elides messages where CATEGORY is not listed in the global variable +# 'opt_warning_types'. +func_warning () +{ + $debug_cmd + + # CATEGORY must be in the warning_categories list! + case " $warning_categories " in + *" $1 "*) ;; + *) func_internal_error "invalid warning category '$1'" ;; + esac + + _G_category=$1 + shift + + case " $opt_warning_types " in + *" $_G_category "*) $warning_func ${1+"$@"} ;; + esac +} + + +# func_sort_ver VER1 VER2 +# ----------------------- +# 'sort -V' is not generally available. +# Note this deviates from the version comparison in automake +# in that it treats 1.5 < 1.5.0, and treats 1.4.4a < 1.4-p3a +# but this should suffice as we won't be specifying old +# version formats or redundant trailing .0 in bootstrap.conf. +# If we did want full compatibility then we should probably +# use m4_version_compare from autoconf. +func_sort_ver () +{ + $debug_cmd + + printf '%s\n%s\n' "$1" "$2" \ + | sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n -k 5,5n -k 6,6n -k 7,7n -k 8,8n -k 9,9n +} + +# func_lt_ver PREV CURR +# --------------------- +# Return true if PREV and CURR are in the correct order according to +# func_sort_ver, otherwise false. Use it like this: +# +# func_lt_ver "$prev_ver" "$proposed_ver" || func_fatal_error "..." +func_lt_ver () +{ + $debug_cmd + + test "x$1" = x`func_sort_ver "$1" "$2" | $SED 1q` +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "10/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: +#! /bin/sh + +# A portable, pluggable option parser for Bourne shell. +# Written by Gary V. Vaughan, 2010 + +# This is free software. There is NO warranty; not even for +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Copyright (C) 2010-2019, 2021 Bootstrap Authors +# +# This file is dual licensed under the terms of the MIT license +# , and GPL version 2 or later +# . You must apply one of +# these licenses when using or redistributing this software or any of +# the files within it. See the URLs above, or the file `LICENSE` +# included in the Bootstrap distribution for the full license texts. + +# Please report bugs or propose patches to: +# + +# Set a version string for this script. +scriptversion=2019-02-19.15; # UTC + + +## ------ ## +## Usage. ## +## ------ ## + +# This file is a library for parsing options in your shell scripts along +# with assorted other useful supporting features that you can make use +# of too. +# +# For the simplest scripts you might need only: +# +# #!/bin/sh +# . relative/path/to/funclib.sh +# . relative/path/to/options-parser +# scriptversion=1.0 +# func_options ${1+"$@"} +# eval set dummy "$func_options_result"; shift +# ...rest of your script... +# +# In order for the '--version' option to work, you will need to have a +# suitably formatted comment like the one at the top of this file +# starting with '# Written by ' and ending with '# Copyright'. +# +# For '-h' and '--help' to work, you will also need a one line +# description of your script's purpose in a comment directly above the +# '# Written by ' line, like the one at the top of this file. +# +# The default options also support '--debug', which will turn on shell +# execution tracing (see the comment above debug_cmd below for another +# use), and '--verbose' and the func_verbose function to allow your script +# to display verbose messages only when your user has specified +# '--verbose'. +# +# After sourcing this file, you can plug in processing for additional +# options by amending the variables from the 'Configuration' section +# below, and following the instructions in the 'Option parsing' +# section further down. + +## -------------- ## +## Configuration. ## +## -------------- ## + +# You should override these variables in your script after sourcing this +# file so that they reflect the customisations you have added to the +# option parser. + +# The usage line for option parsing errors and the start of '-h' and +# '--help' output messages. You can embed shell variables for delayed +# expansion at the time the message is displayed, but you will need to +# quote other shell meta-characters carefully to prevent them being +# expanded when the contents are evaled. +usage='$progpath [OPTION]...' + +# Short help message in response to '-h' and '--help'. Add to this or +# override it after sourcing this library to reflect the full set of +# options your script accepts. +usage_message="\ + --debug enable verbose shell tracing + -W, --warnings=CATEGORY + report the warnings falling in CATEGORY [all] + -v, --verbose verbosely report processing + --version print version information and exit + -h, --help print short or long help message and exit +" + +# Additional text appended to 'usage_message' in response to '--help'. +long_help_message=" +Warning categories include: + 'all' show all warnings + 'none' turn off all the warnings + 'error' warnings are treated as fatal errors" + +# Help message printed before fatal option parsing errors. +fatal_help="Try '\$progname --help' for more information." + + + +## ------------------------- ## +## Hook function management. ## +## ------------------------- ## + +# This section contains functions for adding, removing, and running hooks +# in the main code. A hook is just a list of function names that can be +# run in order later on. + +# func_hookable FUNC_NAME +# ----------------------- +# Declare that FUNC_NAME will run hooks added with +# 'func_add_hook FUNC_NAME ...'. +func_hookable () +{ + $debug_cmd + + func_append hookable_fns " $1" +} + + +# func_add_hook FUNC_NAME HOOK_FUNC +# --------------------------------- +# Request that FUNC_NAME call HOOK_FUNC before it returns. FUNC_NAME must +# first have been declared "hookable" by a call to 'func_hookable'. +func_add_hook () +{ + $debug_cmd + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not accept hook functions." ;; + esac + + eval func_append ${1}_hooks '" $2"' +} + + +# func_remove_hook FUNC_NAME HOOK_FUNC +# ------------------------------------ +# Remove HOOK_FUNC from the list of hook functions to be called by +# FUNC_NAME. +func_remove_hook () +{ + $debug_cmd + + eval ${1}_hooks='`$ECHO "\$'$1'_hooks" |$SED "s| '$2'||"`' +} + + +# func_propagate_result FUNC_NAME_A FUNC_NAME_B +# --------------------------------------------- +# If the *_result variable of FUNC_NAME_A _is set_, assign its value to +# *_result variable of FUNC_NAME_B. +func_propagate_result () +{ + $debug_cmd + + func_propagate_result_result=: + if eval "test \"\${${1}_result+set}\" = set" + then + eval "${2}_result=\$${1}_result" + else + func_propagate_result_result=false + fi +} + + +# func_run_hooks FUNC_NAME [ARG]... +# --------------------------------- +# Run all hook functions registered to FUNC_NAME. +# It's assumed that the list of hook functions contains nothing more +# than a whitespace-delimited list of legal shell function names, and +# no effort is wasted trying to catch shell meta-characters or preserve +# whitespace. +func_run_hooks () +{ + $debug_cmd + + _G_rc_run_hooks=false + + case " $hookable_fns " in + *" $1 "*) ;; + *) func_fatal_error "'$1' does not support hook functions." ;; + esac + + eval _G_hook_fns=\$$1_hooks; shift + + for _G_hook in $_G_hook_fns; do + func_unset "${_G_hook}_result" + eval $_G_hook '${1+"$@"}' + func_propagate_result $_G_hook func_run_hooks + if $func_propagate_result_result; then + eval set dummy "$func_run_hooks_result"; shift + fi + done +} + + + +## --------------- ## +## Option parsing. ## +## --------------- ## + +# In order to add your own option parsing hooks, you must accept the +# full positional parameter list from your hook function. You may remove +# or edit any options that you action, and then pass back the remaining +# unprocessed options in '_result', escaped +# suitably for 'eval'. +# +# The '_result' variable is automatically unset +# before your hook gets called; for best performance, only set the +# *_result variable when necessary (i.e. don't call the 'func_quote' +# function unnecessarily because it can be an expensive operation on some +# machines). +# +# Like this: +# +# my_options_prep () +# { +# $debug_cmd +# +# # Extend the existing usage message. +# usage_message=$usage_message' +# -s, --silent don'\''t print informational messages +# ' +# # No change in '$@' (ignored completely by this hook). Leave +# # my_options_prep_result variable intact. +# } +# func_add_hook func_options_prep my_options_prep +# +# +# my_silent_option () +# { +# $debug_cmd +# +# args_changed=false +# +# # Note that, for efficiency, we parse as many options as we can +# # recognise in a loop before passing the remainder back to the +# # caller on the first unrecognised argument we encounter. +# while test $# -gt 0; do +# opt=$1; shift +# case $opt in +# --silent|-s) opt_silent=: +# args_changed=: +# ;; +# # Separate non-argument short options: +# -s*) func_split_short_opt "$_G_opt" +# set dummy "$func_split_short_opt_name" \ +# "-$func_split_short_opt_arg" ${1+"$@"} +# shift +# args_changed=: +# ;; +# *) # Make sure the first unrecognised option "$_G_opt" +# # is added back to "$@" in case we need it later, +# # if $args_changed was set to 'true'. +# set dummy "$_G_opt" ${1+"$@"}; shift; break ;; +# esac +# done +# +# # Only call 'func_quote' here if we processed at least one argument. +# if $args_changed; then +# func_quote eval ${1+"$@"} +# my_silent_option_result=$func_quote_result +# fi +# } +# func_add_hook func_parse_options my_silent_option +# +# +# my_option_validation () +# { +# $debug_cmd +# +# $opt_silent && $opt_verbose && func_fatal_help "\ +# '--silent' and '--verbose' options are mutually exclusive." +# } +# func_add_hook func_validate_options my_option_validation +# +# You'll also need to manually amend $usage_message to reflect the extra +# options you parse. It's preferable to append if you can, so that +# multiple option parsing hooks can be added safely. + + +# func_options_finish [ARG]... +# ---------------------------- +# Finishing the option parse loop (call 'func_options' hooks ATM). +func_options_finish () +{ + $debug_cmd + + func_run_hooks func_options ${1+"$@"} + func_propagate_result func_run_hooks func_options_finish +} + + +# func_options [ARG]... +# --------------------- +# All the functions called inside func_options are hookable. See the +# individual implementations for details. +func_hookable func_options +func_options () +{ + $debug_cmd + + _G_options_quoted=false + + for my_func in options_prep parse_options validate_options options_finish + do + func_unset func_${my_func}_result + func_unset func_run_hooks_result + eval func_$my_func '${1+"$@"}' + func_propagate_result func_$my_func func_options + if $func_propagate_result_result; then + eval set dummy "$func_options_result"; shift + _G_options_quoted=: + fi + done + + $_G_options_quoted || { + # As we (func_options) are top-level options-parser function and + # nobody quoted "$@" for us yet, we need to do it explicitly for + # caller. + func_quote eval ${1+"$@"} + func_options_result=$func_quote_result + } +} + + +# func_options_prep [ARG]... +# -------------------------- +# All initialisations required before starting the option parse loop. +# Note that when calling hook functions, we pass through the list of +# positional parameters. If a hook function modifies that list, and +# needs to propagate that back to rest of this script, then the complete +# modified list must be put in 'func_run_hooks_result' before returning. +func_hookable func_options_prep +func_options_prep () +{ + $debug_cmd + + # Option defaults: + opt_verbose=false + opt_warning_types= + + func_run_hooks func_options_prep ${1+"$@"} + func_propagate_result func_run_hooks func_options_prep +} + + +# func_parse_options [ARG]... +# --------------------------- +# The main option parsing loop. +func_hookable func_parse_options +func_parse_options () +{ + $debug_cmd + + _G_parse_options_requote=false + # this just eases exit handling + while test $# -gt 0; do + # Defer to hook functions for initial option parsing, so they + # get priority in the event of reusing an option name. + func_run_hooks func_parse_options ${1+"$@"} + func_propagate_result func_run_hooks func_parse_options + if $func_propagate_result_result; then + eval set dummy "$func_parse_options_result"; shift + # Even though we may have changed "$@", we passed the "$@" array + # down into the hook and it quoted it for us (because we are in + # this if-branch). No need to quote it again. + _G_parse_options_requote=false + fi + + # Break out of the loop if we already parsed every option. + test $# -gt 0 || break + + # We expect that one of the options parsed in this function matches + # and thus we remove _G_opt from "$@" and need to re-quote. + _G_match_parse_options=: + _G_opt=$1 + shift + case $_G_opt in + --debug|-x) debug_cmd='set -x' + func_echo "enabling shell trace mode" >&2 + $debug_cmd + ;; + + --no-warnings|--no-warning|--no-warn) + set dummy --warnings none ${1+"$@"} + shift + ;; + + --warnings|--warning|-W) + if test $# = 0 && func_missing_arg $_G_opt; then + _G_parse_options_requote=: + break + fi + case " $warning_categories $1" in + *" $1 "*) + # trailing space prevents matching last $1 above + func_append_uniq opt_warning_types " $1" + ;; + *all) + opt_warning_types=$warning_categories + ;; + *none) + opt_warning_types=none + warning_func=: + ;; + *error) + opt_warning_types=$warning_categories + warning_func=func_fatal_error + ;; + *) + func_fatal_error \ + "unsupported warning category: '$1'" + ;; + esac + shift + ;; + + --verbose|-v) opt_verbose=: ;; + --version) func_version ;; + -\?|-h) func_usage ;; + --help) func_help ;; + + # Separate optargs to long options (plugins may need this): + --*=*) func_split_equals "$_G_opt" + set dummy "$func_split_equals_lhs" \ + "$func_split_equals_rhs" ${1+"$@"} + shift + ;; + + # Separate optargs to short options: + -W*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + # Separate non-argument short options: + -\?*|-h*|-v*|-x*) + func_split_short_opt "$_G_opt" + set dummy "$func_split_short_opt_name" \ + "-$func_split_short_opt_arg" ${1+"$@"} + shift + ;; + + --) _G_parse_options_requote=: ; break ;; + -*) func_fatal_help "unrecognised option: '$_G_opt'" ;; + *) set dummy "$_G_opt" ${1+"$@"}; shift + _G_match_parse_options=false + break + ;; + esac + + if $_G_match_parse_options; then + _G_parse_options_requote=: + fi + done + + if $_G_parse_options_requote; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + func_parse_options_result=$func_quote_result + fi +} + + +# func_validate_options [ARG]... +# ------------------------------ +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +func_hookable func_validate_options +func_validate_options () +{ + $debug_cmd + + # Display all warnings if -W was not given. + test -n "$opt_warning_types" || opt_warning_types=" $warning_categories" + + func_run_hooks func_validate_options ${1+"$@"} + func_propagate_result func_run_hooks func_validate_options + + # Bail if the options were screwed! + $exit_cmd $EXIT_FAILURE +} + + + +## ----------------- ## +## Helper functions. ## +## ----------------- ## + +# This section contains the helper functions used by the rest of the +# hookable option parser framework in ascii-betical order. + + +# func_fatal_help ARG... +# ---------------------- +# Echo program name prefixed message to standard error, followed by +# a help hint, and exit. +func_fatal_help () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + eval \$ECHO \""$fatal_help"\" + func_error ${1+"$@"} + exit $EXIT_FAILURE +} + + +# func_help +# --------- +# Echo long help message to standard output and exit. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message" + exit 0 +} + + +# func_missing_arg ARGNAME +# ------------------------ +# Echo program name prefixed message to standard error and set global +# exit_cmd. +func_missing_arg () +{ + $debug_cmd + + func_error "Missing argument for '$1'." + exit_cmd=exit +} + + +# func_split_equals STRING +# ------------------------ +# Set func_split_equals_lhs and func_split_equals_rhs shell variables +# after splitting STRING at the '=' sign. +test -z "$_G_HAVE_XSI_OPS" \ + && (eval 'x=a/b/c; + test 5aa/bb/cc = "${#x}${x%%/*}${x%/*}${x#*/}${x##*/}"') 2>/dev/null \ + && _G_HAVE_XSI_OPS=yes + +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=${1%%=*} + func_split_equals_rhs=${1#*=} + if test "x$func_split_equals_lhs" = "x$1"; then + func_split_equals_rhs= + fi + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_equals () + { + $debug_cmd + + func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'` + func_split_equals_rhs= + test "x$func_split_equals_lhs=" = "x$1" \ + || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'` + } +fi #func_split_equals + + +# func_split_short_opt SHORTOPT +# ----------------------------- +# Set func_split_short_opt_name and func_split_short_opt_arg shell +# variables after splitting SHORTOPT after the 2nd character. +if test yes = "$_G_HAVE_XSI_OPS" +then + # This is an XSI compatible shell, allowing a faster implementation... + eval 'func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_arg=${1#??} + func_split_short_opt_name=${1%"$func_split_short_opt_arg"} + }' +else + # ...otherwise fall back to using expr, which is often a shell builtin. + func_split_short_opt () + { + $debug_cmd + + func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'` + func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'` + } +fi #func_split_short_opt + + +# func_usage +# ---------- +# Echo short help message to standard output and exit. +func_usage () +{ + $debug_cmd + + func_usage_message + $ECHO "Run '$progname --help |${PAGER-more}' for full usage" + exit 0 +} + + +# func_usage_message +# ------------------ +# Echo short help message to standard output. +func_usage_message () +{ + $debug_cmd + + eval \$ECHO \""Usage: $usage"\" + echo + $SED -n 's|^# || + /^Written by/{ + x;p;x + } + h + /^Written by/q' < "$progpath" + echo + eval \$ECHO \""$usage_message"\" +} + + +# func_version +# ------------ +# Echo version message to standard output and exit. +# The version message is extracted from the calling file's header +# comments, with leading '# ' stripped: +# 1. First display the progname and version +# 2. Followed by the header comment line matching /^# Written by / +# 3. Then a blank line followed by the first following line matching +# /^# Copyright / +# 4. Immediately followed by any lines between the previous matches, +# except lines preceding the intervening completely blank line. +# For example, see the header comments of this file. +func_version () +{ + $debug_cmd + + printf '%s\n' "$progname $scriptversion" + $SED -n ' + /^# Written by /!b + s|^# ||; p; n + + :fwd2blnk + /./ { + n + b fwd2blnk + } + p; n + + :holdwrnt + s|^# || + s|^# *$|| + /^Copyright /!{ + /./H + n + b holdwrnt + } + + s|\((C)\)[ 0-9,-]*[ ,-]\([1-9][0-9]* \)|\1 \2| + G + s|\(\n\)\n*|\1|g + p; q' < "$progpath" + + exit $? +} + + +# Local variables: +# mode: shell-script +# sh-indentation: 2 +# eval: (add-hook 'before-save-hook 'time-stamp) +# time-stamp-pattern: "30/scriptversion=%:y-%02m-%02d.%02H; # UTC" +# time-stamp-time-zone: "UTC" +# End: + +# Set a version string. +scriptversion='(GNU libtool) 2.4.7' + + +# func_echo ARG... +# ---------------- +# Libtool also displays the current mode in messages, so override +# funclib.sh func_echo with this custom definition. +func_echo () +{ + $debug_cmd + + _G_message=$* + + func_echo_IFS=$IFS + IFS=$nl + for _G_line in $_G_message; do + IFS=$func_echo_IFS + $ECHO "$progname${opt_mode+: $opt_mode}: $_G_line" + done + IFS=$func_echo_IFS +} + + +# func_warning ARG... +# ------------------- +# Libtool warnings are not categorized, so override funclib.sh +# func_warning with this simpler definition. +func_warning () +{ + $debug_cmd + + $warning_func ${1+"$@"} +} + + +## ---------------- ## +## Options parsing. ## +## ---------------- ## + +# Hook in the functions to make sure our own options are parsed during +# the option parsing loop. + +usage='$progpath [OPTION]... [MODE-ARG]...' + +# Short help message in response to '-h'. +usage_message="Options: + --config show all configuration variables + --debug enable verbose shell tracing + -n, --dry-run display commands without modifying any files + --features display basic configuration information and exit + --mode=MODE use operation mode MODE + --no-warnings equivalent to '-Wnone' + --preserve-dup-deps don't remove duplicate dependency libraries + --quiet, --silent don't print informational messages + --tag=TAG use configuration variables from tag TAG + -v, --verbose print more informational messages than default + --version print version information + -W, --warnings=CATEGORY report the warnings falling in CATEGORY [all] + -h, --help, --help-all print short, long, or detailed help message +" + +# Additional text appended to 'usage_message' in response to '--help'. +func_help () +{ + $debug_cmd + + func_usage_message + $ECHO "$long_help_message + +MODE must be one of the following: + + clean remove files from the build directory + compile compile a source file into a libtool object + execute automatically set library path, then run a program + finish complete the installation of libtool libraries + install install libraries or executables + link create a library or an executable + uninstall remove libraries from an installed directory + +MODE-ARGS vary depending on the MODE. When passed as first option, +'--mode=MODE' may be abbreviated as 'MODE' or a unique abbreviation of that. +Try '$progname --help --mode=MODE' for a more detailed description of MODE. + +When reporting a bug, please describe a test case to reproduce it and +include the following information: + + host-triplet: $host + shell: $SHELL + compiler: $LTCC + compiler flags: $LTCFLAGS + linker: $LD (gnu? $with_gnu_ld) + version: $progname $scriptversion Debian-2.4.7-7build1 + automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q` + autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q` + +Report bugs to . +GNU libtool home page: . +General help using GNU software: ." + exit 0 +} + + +# func_lo2o OBJECT-NAME +# --------------------- +# Transform OBJECT-NAME from a '.lo' suffix to the platform specific +# object suffix. + +lo2o=s/\\.lo\$/.$objext/ +o2lo=s/\\.$objext\$/.lo/ + +if test yes = "$_G_HAVE_XSI_OPS"; then + eval 'func_lo2o () + { + case $1 in + *.lo) func_lo2o_result=${1%.lo}.$objext ;; + * ) func_lo2o_result=$1 ;; + esac + }' + + # func_xform LIBOBJ-OR-SOURCE + # --------------------------- + # Transform LIBOBJ-OR-SOURCE from a '.o' or '.c' (or otherwise) + # suffix to a '.lo' libtool-object suffix. + eval 'func_xform () + { + func_xform_result=${1%.*}.lo + }' +else + # ...otherwise fall back to using sed. + func_lo2o () + { + func_lo2o_result=`$ECHO "$1" | $SED "$lo2o"` + } + + func_xform () + { + func_xform_result=`$ECHO "$1" | $SED 's|\.[^.]*$|.lo|'` + } +fi + + +# func_fatal_configuration ARG... +# ------------------------------- +# Echo program name prefixed message to standard error, followed by +# a configuration failure hint, and exit. +func_fatal_configuration () +{ + func_fatal_error ${1+"$@"} \ + "See the $PACKAGE documentation for more information." \ + "Fatal configuration error." +} + + +# func_config +# ----------- +# Display the configuration for all the tags in this script. +func_config () +{ + re_begincf='^# ### BEGIN LIBTOOL' + re_endcf='^# ### END LIBTOOL' + + # Default configuration. + $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" + + # Now print the configurations for the tags. + for tagname in $taglist; do + $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" + done + + exit $? +} + + +# func_features +# ------------- +# Display the features supported by this script. +func_features () +{ + echo "host: $host" + if test yes = "$build_libtool_libs"; then + echo "enable shared libraries" + else + echo "disable shared libraries" + fi + if test yes = "$build_old_libs"; then + echo "enable static libraries" + else + echo "disable static libraries" + fi + + exit $? +} + + +# func_enable_tag TAGNAME +# ----------------------- +# Verify that TAGNAME is valid, and either flag an error and exit, or +# enable the TAGNAME tag. We also add TAGNAME to the global $taglist +# variable here. +func_enable_tag () +{ + # Global variable: + tagname=$1 + + re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" + re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" + sed_extractcf=/$re_begincf/,/$re_endcf/p + + # Validate tagname. + case $tagname in + *[!-_A-Za-z0-9,/]*) + func_fatal_error "invalid tag name: $tagname" + ;; + esac + + # Don't test for the "default" C tag, as we know it's + # there but not specially marked. + case $tagname in + CC) ;; + *) + if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then + taglist="$taglist $tagname" + + # Evaluate the configuration. Be careful to quote the path + # and the sed script, to avoid splitting on whitespace, but + # also don't use non-portable quotes within backquotes within + # quotes we have to do it in 2 steps: + extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` + eval "$extractedcf" + else + func_error "ignoring unknown tag $tagname" + fi + ;; + esac +} + + +# func_check_version_match +# ------------------------ +# Ensure that we are using m4 macros, and libtool script from the same +# release of libtool. +func_check_version_match () +{ + if test "$package_revision" != "$macro_revision"; then + if test "$VERSION" != "$macro_version"; then + if test -z "$macro_version"; then + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from an older release. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, but the +$progname: definition of this LT_INIT comes from $PACKAGE $macro_version. +$progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION +$progname: and run autoconf again. +_LT_EOF + fi + else + cat >&2 <<_LT_EOF +$progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, +$progname: but the definition of this LT_INIT comes from revision $macro_revision. +$progname: You should recreate aclocal.m4 with macros from revision $package_revision +$progname: of $PACKAGE $VERSION and run autoconf again. +_LT_EOF + fi + + exit $EXIT_MISMATCH + fi +} + + +# libtool_options_prep [ARG]... +# ----------------------------- +# Preparation for options parsed by libtool. +libtool_options_prep () +{ + $debug_mode + + # Option defaults: + opt_config=false + opt_dlopen= + opt_dry_run=false + opt_help=false + opt_mode= + opt_preserve_dup_deps=false + opt_quiet=false + + nonopt= + preserve_args= + + _G_rc_lt_options_prep=: + + _G_rc_lt_options_prep=: + + # Shorthand for --mode=foo, only valid as the first argument + case $1 in + clean|clea|cle|cl) + shift; set dummy --mode clean ${1+"$@"}; shift + ;; + compile|compil|compi|comp|com|co|c) + shift; set dummy --mode compile ${1+"$@"}; shift + ;; + execute|execut|execu|exec|exe|ex|e) + shift; set dummy --mode execute ${1+"$@"}; shift + ;; + finish|finis|fini|fin|fi|f) + shift; set dummy --mode finish ${1+"$@"}; shift + ;; + install|instal|insta|inst|ins|in|i) + shift; set dummy --mode install ${1+"$@"}; shift + ;; + link|lin|li|l) + shift; set dummy --mode link ${1+"$@"}; shift + ;; + uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) + shift; set dummy --mode uninstall ${1+"$@"}; shift + ;; + *) + _G_rc_lt_options_prep=false + ;; + esac + + if $_G_rc_lt_options_prep; then + # Pass back the list of options. + func_quote eval ${1+"$@"} + libtool_options_prep_result=$func_quote_result + fi +} +func_add_hook func_options_prep libtool_options_prep + + +# libtool_parse_options [ARG]... +# --------------------------------- +# Provide handling for libtool specific options. +libtool_parse_options () +{ + $debug_cmd + + _G_rc_lt_parse_options=false + + # Perform our own loop to consume as many options as possible in + # each iteration. + while test $# -gt 0; do + _G_match_lt_parse_options=: + _G_opt=$1 + shift + case $_G_opt in + --dry-run|--dryrun|-n) + opt_dry_run=: + ;; + + --config) func_config ;; + + --dlopen|-dlopen) + opt_dlopen="${opt_dlopen+$opt_dlopen +}$1" + shift + ;; + + --preserve-dup-deps) + opt_preserve_dup_deps=: ;; + + --features) func_features ;; + + --finish) set dummy --mode finish ${1+"$@"}; shift ;; + + --help) opt_help=: ;; + + --help-all) opt_help=': help-all' ;; + + --mode) test $# = 0 && func_missing_arg $_G_opt && break + opt_mode=$1 + case $1 in + # Valid mode arguments: + clean|compile|execute|finish|install|link|relink|uninstall) ;; + + # Catch anything else as an error + *) func_error "invalid argument for $_G_opt" + exit_cmd=exit + break + ;; + esac + shift + ;; + + --no-silent|--no-quiet) + opt_quiet=false + func_append preserve_args " $_G_opt" + ;; + + --no-warnings|--no-warning|--no-warn) + opt_warning=false + func_append preserve_args " $_G_opt" + ;; + + --no-verbose) + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --silent|--quiet) + opt_quiet=: + opt_verbose=false + func_append preserve_args " $_G_opt" + ;; + + --tag) test $# = 0 && func_missing_arg $_G_opt && break + opt_tag=$1 + func_append preserve_args " $_G_opt $1" + func_enable_tag "$1" + shift + ;; + + --verbose|-v) opt_quiet=false + opt_verbose=: + func_append preserve_args " $_G_opt" + ;; + + # An option not handled by this hook function: + *) set dummy "$_G_opt" ${1+"$@"} ; shift + _G_match_lt_parse_options=false + break + ;; + esac + $_G_match_lt_parse_options && _G_rc_lt_parse_options=: + done + + if $_G_rc_lt_parse_options; then + # save modified positional parameters for caller + func_quote eval ${1+"$@"} + libtool_parse_options_result=$func_quote_result + fi +} +func_add_hook func_parse_options libtool_parse_options + + + +# libtool_validate_options [ARG]... +# --------------------------------- +# Perform any sanity checks on option settings and/or unconsumed +# arguments. +libtool_validate_options () +{ + # save first non-option argument + if test 0 -lt $#; then + nonopt=$1 + shift + fi + + # preserve --debug + test : = "$debug_cmd" || func_append preserve_args " --debug" + + case $host in + # Solaris2 added to fix http://debbugs.gnu.org/cgi/bugreport.cgi?bug=16452 + # see also: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59788 + *cygwin* | *mingw* | *pw32* | *cegcc* | *solaris2* | *os2*) + # don't eliminate duplications in $postdeps and $predeps + opt_duplicate_compiler_generated_deps=: + ;; + *) + opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps + ;; + esac + + $opt_help || { + # Sanity checks first: + func_check_version_match + + test yes != "$build_libtool_libs" \ + && test yes != "$build_old_libs" \ + && func_fatal_configuration "not configured to build any kind of library" + + # Darwin sucks + eval std_shrext=\"$shrext_cmds\" + + # Only execute mode is allowed to have -dlopen flags. + if test -n "$opt_dlopen" && test execute != "$opt_mode"; then + func_error "unrecognized option '-dlopen'" + $ECHO "$help" 1>&2 + exit $EXIT_FAILURE + fi + + # Change the help message to a mode-specific one. + generic_help=$help + help="Try '$progname --help --mode=$opt_mode' for more information." + } + + # Pass back the unparsed argument list + func_quote eval ${1+"$@"} + libtool_validate_options_result=$func_quote_result +} +func_add_hook func_validate_options libtool_validate_options + + +# Process options as early as possible so that --help and --version +# can return quickly. +func_options ${1+"$@"} +eval set dummy "$func_options_result"; shift + + + +## ----------- ## +## Main. ## +## ----------- ## + +magic='%%%MAGIC variable%%%' +magic_exe='%%%MAGIC EXE variable%%%' + +# Global variables. +extracted_archives= +extracted_serial=0 + +# If this variable is set in any of the actions, the command in it +# will be execed at the end. This prevents here-documents from being +# left over by shells. +exec_cmd= + + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + +# func_generated_by_libtool +# True iff stdin has been generated by Libtool. This function is only +# a basic sanity check; it will hardly flush out determined imposters. +func_generated_by_libtool_p () +{ + $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 +} + +# func_lalib_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_lalib_p () +{ + test -f "$1" && + $SED -e 4q "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_lalib_unsafe_p file +# True iff FILE is a libtool '.la' library or '.lo' object file. +# This function implements the same check as func_lalib_p without +# resorting to external programs. To this end, it redirects stdin and +# closes it afterwards, without saving the original file descriptor. +# As a safety measure, use it only where a negative result would be +# fatal anyway. Works if 'file' does not exist. +func_lalib_unsafe_p () +{ + lalib_p=no + if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then + for lalib_p_l in 1 2 3 4 + do + read lalib_p_line + case $lalib_p_line in + \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; + esac + done + exec 0<&5 5<&- + fi + test yes = "$lalib_p" +} + +# func_ltwrapper_script_p file +# True iff FILE is a libtool wrapper script +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_script_p () +{ + test -f "$1" && + $lt_truncate_bin < "$1" 2>/dev/null | func_generated_by_libtool_p +} + +# func_ltwrapper_executable_p file +# True iff FILE is a libtool wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_executable_p () +{ + func_ltwrapper_exec_suffix= + case $1 in + *.exe) ;; + *) func_ltwrapper_exec_suffix=.exe ;; + esac + $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 +} + +# func_ltwrapper_scriptname file +# Assumes file is an ltwrapper_executable +# uses $file to determine the appropriate filename for a +# temporary ltwrapper_script. +func_ltwrapper_scriptname () +{ + func_dirname_and_basename "$1" "" "." + func_stripname '' '.exe' "$func_basename_result" + func_ltwrapper_scriptname_result=$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper +} + +# func_ltwrapper_p file +# True iff FILE is a libtool wrapper script or wrapper executable +# This function is only a basic sanity check; it will hardly flush out +# determined imposters. +func_ltwrapper_p () +{ + func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" +} + + +# func_execute_cmds commands fail_cmd +# Execute tilde-delimited COMMANDS. +# If FAIL_CMD is given, eval that upon failure. +# FAIL_CMD may read-access the current command in variable CMD! +func_execute_cmds () +{ + $debug_cmd + + save_ifs=$IFS; IFS='~' + for cmd in $1; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + func_show_eval "$cmd" "${2-:}" + done + IFS=$save_ifs +} + + +# func_source file +# Source FILE, adding directory component if necessary. +# Note that it is not necessary on cygwin/mingw to append a dot to +# FILE even if both FILE and FILE.exe exist: automatic-append-.exe +# behavior happens only for exec(3), not for open(2)! Also, sourcing +# 'FILE.' does not work on cygwin managed mounts. +func_source () +{ + $debug_cmd + + case $1 in + */* | *\\*) . "$1" ;; + *) . "./$1" ;; + esac +} + + +# func_resolve_sysroot PATH +# Replace a leading = in PATH with a sysroot. Store the result into +# func_resolve_sysroot_result +func_resolve_sysroot () +{ + func_resolve_sysroot_result=$1 + case $func_resolve_sysroot_result in + =*) + func_stripname '=' '' "$func_resolve_sysroot_result" + func_resolve_sysroot_result=$lt_sysroot$func_stripname_result + ;; + esac +} + +# func_replace_sysroot PATH +# If PATH begins with the sysroot, replace it with = and +# store the result into func_replace_sysroot_result. +func_replace_sysroot () +{ + case $lt_sysroot:$1 in + ?*:"$lt_sysroot"*) + func_stripname "$lt_sysroot" '' "$1" + func_replace_sysroot_result='='$func_stripname_result + ;; + *) + # Including no sysroot. + func_replace_sysroot_result=$1 + ;; + esac +} + +# func_infer_tag arg +# Infer tagged configuration to use if any are available and +# if one wasn't chosen via the "--tag" command line option. +# Only attempt this if the compiler in the base compile +# command doesn't match the default compiler. +# arg is usually of the form 'gcc ...' +func_infer_tag () +{ + $debug_cmd + + if test -n "$available_tags" && test -z "$tagname"; then + CC_quoted= + for arg in $CC; do + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case $@ in + # Blanks in the command may have been stripped by the calling shell, + # but not from the CC environment variable when configure was run. + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; + # Blanks at the start of $base_compile will cause this to fail + # if we don't check for them as well. + *) + for z in $available_tags; do + if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then + # Evaluate the configuration. + eval "`$SED -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" + CC_quoted= + for arg in $CC; do + # Double-quote args containing other shell metacharacters. + func_append_quoted CC_quoted "$arg" + done + CC_expanded=`func_echo_all $CC` + CC_quoted_expanded=`func_echo_all $CC_quoted` + case "$@ " in + " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ + " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) + # The compiler in the base compile command matches + # the one in the tagged configuration. + # Assume this is the tagged configuration we want. + tagname=$z + break + ;; + esac + fi + done + # If $tagname still isn't set, then no tagged configuration + # was found and let the user know that the "--tag" command + # line option must be used. + if test -z "$tagname"; then + func_echo "unable to infer tagged configuration" + func_fatal_error "specify a tag with '--tag'" +# else +# func_verbose "using $tagname tagged configuration" + fi + ;; + esac + fi +} + + + +# func_write_libtool_object output_name pic_name nonpic_name +# Create a libtool object file (analogous to a ".la" file), +# but don't create it if we're doing a dry run. +func_write_libtool_object () +{ + write_libobj=$1 + if test yes = "$build_libtool_libs"; then + write_lobj=\'$2\' + else + write_lobj=none + fi + + if test yes = "$build_old_libs"; then + write_oldobj=\'$3\' + else + write_oldobj=none + fi + + $opt_dry_run || { + cat >${write_libobj}T </dev/null` + if test "$?" -eq 0 && test -n "$func_convert_core_file_wine_to_w32_tmp"; then + func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | + $SED -e "$sed_naive_backslashify"` + else + func_convert_core_file_wine_to_w32_result= + fi + fi +} +# end: func_convert_core_file_wine_to_w32 + + +# func_convert_core_path_wine_to_w32 ARG +# Helper function used by path conversion functions when $build is *nix, and +# $host is mingw, cygwin, or some other w32 environment. Relies on a correctly +# configured wine environment available, with the winepath program in $build's +# $PATH. Assumes ARG has no leading or trailing path separator characters. +# +# ARG is path to be converted from $build format to win32. +# Result is available in $func_convert_core_path_wine_to_w32_result. +# Unconvertible file (directory) names in ARG are skipped; if no directory names +# are convertible, then the result may be empty. +func_convert_core_path_wine_to_w32 () +{ + $debug_cmd + + # unfortunately, winepath doesn't convert paths, only file names + func_convert_core_path_wine_to_w32_result= + if test -n "$1"; then + oldIFS=$IFS + IFS=: + for func_convert_core_path_wine_to_w32_f in $1; do + IFS=$oldIFS + func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" + if test -n "$func_convert_core_file_wine_to_w32_result"; then + if test -z "$func_convert_core_path_wine_to_w32_result"; then + func_convert_core_path_wine_to_w32_result=$func_convert_core_file_wine_to_w32_result + else + func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" + fi + fi + done + IFS=$oldIFS + fi +} +# end: func_convert_core_path_wine_to_w32 + + +# func_cygpath ARGS... +# Wrapper around calling the cygpath program via LT_CYGPATH. This is used when +# when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) +# $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or +# (2), returns the Cygwin file name or path in func_cygpath_result (input +# file name or path is assumed to be in w32 format, as previously converted +# from $build's *nix or MSYS format). In case (3), returns the w32 file name +# or path in func_cygpath_result (input file name or path is assumed to be in +# Cygwin format). Returns an empty string on error. +# +# ARGS are passed to cygpath, with the last one being the file name or path to +# be converted. +# +# Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH +# environment variable; do not put it in $PATH. +func_cygpath () +{ + $debug_cmd + + if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then + func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` + if test "$?" -ne 0; then + # on failure, ensure result is empty + func_cygpath_result= + fi + else + func_cygpath_result= + func_error "LT_CYGPATH is empty or specifies non-existent file: '$LT_CYGPATH'" + fi +} +#end: func_cygpath + + +# func_convert_core_msys_to_w32 ARG +# Convert file name or path ARG from MSYS format to w32 format. Return +# result in func_convert_core_msys_to_w32_result. +func_convert_core_msys_to_w32 () +{ + $debug_cmd + + # awkward: cmd appends spaces to result + func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | + $SED -e 's/[ ]*$//' -e "$sed_naive_backslashify"` +} +#end: func_convert_core_msys_to_w32 + + +# func_convert_file_check ARG1 ARG2 +# Verify that ARG1 (a file name in $build format) was converted to $host +# format in ARG2. Otherwise, emit an error message, but continue (resetting +# func_to_host_file_result to ARG1). +func_convert_file_check () +{ + $debug_cmd + + if test -z "$2" && test -n "$1"; then + func_error "Could not determine host file name corresponding to" + func_error " '$1'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback: + func_to_host_file_result=$1 + fi +} +# end func_convert_file_check + + +# func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH +# Verify that FROM_PATH (a path in $build format) was converted to $host +# format in TO_PATH. Otherwise, emit an error message, but continue, resetting +# func_to_host_file_result to a simplistic fallback value (see below). +func_convert_path_check () +{ + $debug_cmd + + if test -z "$4" && test -n "$3"; then + func_error "Could not determine the host path corresponding to" + func_error " '$3'" + func_error "Continuing, but uninstalled executables may not work." + # Fallback. This is a deliberately simplistic "conversion" and + # should not be "improved". See libtool.info. + if test "x$1" != "x$2"; then + lt_replace_pathsep_chars="s|$1|$2|g" + func_to_host_path_result=`echo "$3" | + $SED -e "$lt_replace_pathsep_chars"` + else + func_to_host_path_result=$3 + fi + fi +} +# end func_convert_path_check + + +# func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG +# Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT +# and appending REPL if ORIG matches BACKPAT. +func_convert_path_front_back_pathsep () +{ + $debug_cmd + + case $4 in + $1 ) func_to_host_path_result=$3$func_to_host_path_result + ;; + esac + case $4 in + $2 ) func_append func_to_host_path_result "$3" + ;; + esac +} +# end func_convert_path_front_back_pathsep + + +################################################## +# $build to $host FILE NAME CONVERSION FUNCTIONS # +################################################## +# invoked via '$to_host_file_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# Result will be available in $func_to_host_file_result. + + +# func_to_host_file ARG +# Converts the file name ARG from $build format to $host format. Return result +# in func_to_host_file_result. +func_to_host_file () +{ + $debug_cmd + + $to_host_file_cmd "$1" +} +# end func_to_host_file + + +# func_to_tool_file ARG LAZY +# converts the file name ARG from $build format to toolchain format. Return +# result in func_to_tool_file_result. If the conversion in use is listed +# in (the comma separated) LAZY, no conversion takes place. +func_to_tool_file () +{ + $debug_cmd + + case ,$2, in + *,"$to_tool_file_cmd",*) + func_to_tool_file_result=$1 + ;; + *) + $to_tool_file_cmd "$1" + func_to_tool_file_result=$func_to_host_file_result + ;; + esac +} +# end func_to_tool_file + + +# func_convert_file_noop ARG +# Copy ARG to func_to_host_file_result. +func_convert_file_noop () +{ + func_to_host_file_result=$1 +} +# end func_convert_file_noop + + +# func_convert_file_msys_to_w32 ARG +# Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_file_result. +func_convert_file_msys_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_to_host_file_result=$func_convert_core_msys_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_w32 + + +# func_convert_file_cygwin_to_w32 ARG +# Convert file name ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_file_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # because $build is cygwin, we call "the" cygpath in $PATH; no need to use + # LT_CYGPATH in this case. + func_to_host_file_result=`cygpath -m "$1"` + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_cygwin_to_w32 + + +# func_convert_file_nix_to_w32 ARG +# Convert file name ARG from *nix to w32 format. Requires a wine environment +# and a working winepath. Returns result in func_to_host_file_result. +func_convert_file_nix_to_w32 () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_file_wine_to_w32 "$1" + func_to_host_file_result=$func_convert_core_file_wine_to_w32_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_w32 + + +# func_convert_file_msys_to_cygwin ARG +# Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_file_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + func_convert_core_msys_to_w32 "$1" + func_cygpath -u "$func_convert_core_msys_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_msys_to_cygwin + + +# func_convert_file_nix_to_cygwin ARG +# Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed +# in a wine environment, working winepath, and LT_CYGPATH set. Returns result +# in func_to_host_file_result. +func_convert_file_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_file_result=$1 + if test -n "$1"; then + # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. + func_convert_core_file_wine_to_w32 "$1" + func_cygpath -u "$func_convert_core_file_wine_to_w32_result" + func_to_host_file_result=$func_cygpath_result + fi + func_convert_file_check "$1" "$func_to_host_file_result" +} +# end func_convert_file_nix_to_cygwin + + +############################################# +# $build to $host PATH CONVERSION FUNCTIONS # +############################################# +# invoked via '$to_host_path_cmd ARG' +# +# In each case, ARG is the path to be converted from $build to $host format. +# The result will be available in $func_to_host_path_result. +# +# Path separators are also converted from $build format to $host format. If +# ARG begins or ends with a path separator character, it is preserved (but +# converted to $host format) on output. +# +# All path conversion functions are named using the following convention: +# file name conversion function : func_convert_file_X_to_Y () +# path conversion function : func_convert_path_X_to_Y () +# where, for any given $build/$host combination the 'X_to_Y' value is the +# same. If conversion functions are added for new $build/$host combinations, +# the two new functions must follow this pattern, or func_init_to_host_path_cmd +# will break. + + +# func_init_to_host_path_cmd +# Ensures that function "pointer" variable $to_host_path_cmd is set to the +# appropriate value, based on the value of $to_host_file_cmd. +to_host_path_cmd= +func_init_to_host_path_cmd () +{ + $debug_cmd + + if test -z "$to_host_path_cmd"; then + func_stripname 'func_convert_file_' '' "$to_host_file_cmd" + to_host_path_cmd=func_convert_path_$func_stripname_result + fi +} + + +# func_to_host_path ARG +# Converts the path ARG from $build format to $host format. Return result +# in func_to_host_path_result. +func_to_host_path () +{ + $debug_cmd + + func_init_to_host_path_cmd + $to_host_path_cmd "$1" +} +# end func_to_host_path + + +# func_convert_path_noop ARG +# Copy ARG to func_to_host_path_result. +func_convert_path_noop () +{ + func_to_host_path_result=$1 +} +# end func_convert_path_noop + + +# func_convert_path_msys_to_w32 ARG +# Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic +# conversion to w32 is not available inside the cwrapper. Returns result in +# func_to_host_path_result. +func_convert_path_msys_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from ARG. MSYS + # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; + # and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_msys_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_msys_to_w32 + + +# func_convert_path_cygwin_to_w32 ARG +# Convert path ARG from Cygwin to w32 format. Returns result in +# func_to_host_file_result. +func_convert_path_cygwin_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_cygwin_to_w32 + + +# func_convert_path_nix_to_w32 ARG +# Convert path ARG from *nix to w32 format. Requires a wine environment and +# a working winepath. Returns result in func_to_host_file_result. +func_convert_path_nix_to_w32 () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_to_host_path_result=$func_convert_core_path_wine_to_w32_result + func_convert_path_check : ";" \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" + fi +} +# end func_convert_path_nix_to_w32 + + +# func_convert_path_msys_to_cygwin ARG +# Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. +# Returns result in func_to_host_file_result. +func_convert_path_msys_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # See func_convert_path_msys_to_w32: + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_msys_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_msys_to_cygwin + + +# func_convert_path_nix_to_cygwin ARG +# Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a +# a wine environment, working winepath, and LT_CYGPATH set. Returns result in +# func_to_host_file_result. +func_convert_path_nix_to_cygwin () +{ + $debug_cmd + + func_to_host_path_result=$1 + if test -n "$1"; then + # Remove leading and trailing path separator characters from + # ARG. msys behavior is inconsistent here, cygpath turns them + # into '.;' and ';.', and winepath ignores them completely. + func_stripname : : "$1" + func_to_host_path_tmp1=$func_stripname_result + func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" + func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" + func_to_host_path_result=$func_cygpath_result + func_convert_path_check : : \ + "$func_to_host_path_tmp1" "$func_to_host_path_result" + func_convert_path_front_back_pathsep ":*" "*:" : "$1" + fi +} +# end func_convert_path_nix_to_cygwin + + +# func_dll_def_p FILE +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with _LT_DLL_DEF_P in libtool.m4 +func_dll_def_p () +{ + $debug_cmd + + func_dll_def_p_tmp=`$SED -n \ + -e 's/^[ ]*//' \ + -e '/^\(;.*\)*$/d' \ + -e 's/^\(EXPORTS\|LIBRARY\)\([ ].*\)*$/DEF/p' \ + -e q \ + "$1"` + test DEF = "$func_dll_def_p_tmp" +} + + +# func_mode_compile arg... +func_mode_compile () +{ + $debug_cmd + + # Get the compilation command and the source file. + base_compile= + srcfile=$nonopt # always keep a non-empty value in "srcfile" + suppress_opt=yes + suppress_output= + arg_mode=normal + libobj= + later= + pie_flag= + + for arg + do + case $arg_mode in + arg ) + # do not "continue". Instead, add this to base_compile + lastarg=$arg + arg_mode=normal + ;; + + target ) + libobj=$arg + arg_mode=normal + continue + ;; + + normal ) + # Accept any command-line options. + case $arg in + -o) + test -n "$libobj" && \ + func_fatal_error "you cannot specify '-o' more than once" + arg_mode=target + continue + ;; + + -pie | -fpie | -fPIE) + func_append pie_flag " $arg" + continue + ;; + + -shared | -static | -prefer-pic | -prefer-non-pic) + func_append later " $arg" + continue + ;; + + -no-suppress) + suppress_opt=no + continue + ;; + + -Xcompiler) + arg_mode=arg # the next one goes into the "base_compile" arg list + continue # The current "srcfile" will either be retained or + ;; # replaced later. I would guess that would be a bug. + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + lastarg= + save_ifs=$IFS; IFS=, + for arg in $args; do + IFS=$save_ifs + func_append_quoted lastarg "$arg" + done + IFS=$save_ifs + func_stripname ' ' '' "$lastarg" + lastarg=$func_stripname_result + + # Add the arguments to base_compile. + func_append base_compile " $lastarg" + continue + ;; + + *) + # Accept the current argument as the source file. + # The previous "srcfile" becomes the current argument. + # + lastarg=$srcfile + srcfile=$arg + ;; + esac # case $arg + ;; + esac # case $arg_mode + + # Aesthetically quote the previous argument. + func_append_quoted base_compile "$lastarg" + done # for arg + + case $arg_mode in + arg) + func_fatal_error "you must specify an argument for -Xcompile" + ;; + target) + func_fatal_error "you must specify a target with '-o'" + ;; + *) + # Get the name of the library object. + test -z "$libobj" && { + func_basename "$srcfile" + libobj=$func_basename_result + } + ;; + esac + + # Recognize several different file suffixes. + # If the user specifies -o file.o, it is replaced with file.lo + case $libobj in + *.[cCFSifmso] | \ + *.ada | *.adb | *.ads | *.asm | \ + *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ + *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) + func_xform "$libobj" + libobj=$func_xform_result + ;; + esac + + case $libobj in + *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; + *) + func_fatal_error "cannot determine name of library object from '$libobj'" + ;; + esac + + func_infer_tag $base_compile + + for arg in $later; do + case $arg in + -shared) + test yes = "$build_libtool_libs" \ + || func_fatal_configuration "cannot build a shared library" + build_old_libs=no + continue + ;; + + -static) + build_libtool_libs=no + build_old_libs=yes + continue + ;; + + -prefer-pic) + pic_mode=yes + continue + ;; + + -prefer-non-pic) + pic_mode=no + continue + ;; + esac + done + + func_quote_arg pretty "$libobj" + test "X$libobj" != "X$func_quote_arg_result" \ + && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ + && func_warning "libobj name '$libobj' may not contain shell special characters." + func_dirname_and_basename "$obj" "/" "" + objname=$func_basename_result + xdir=$func_dirname_result + lobj=$xdir$objdir/$objname + + test -z "$base_compile" && \ + func_fatal_help "you must specify a compilation command" + + # Delete any leftover library objects. + if test yes = "$build_old_libs"; then + removelist="$obj $lobj $libobj ${libobj}T" + else + removelist="$lobj $libobj ${libobj}T" + fi + + # On Cygwin there's no "real" PIC flag so we must build both object types + case $host_os in + cygwin* | mingw* | pw32* | os2* | cegcc*) + pic_mode=default + ;; + esac + if test no = "$pic_mode" && test pass_all != "$deplibs_check_method"; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi + + # Calculate the filename of the output object if compiler does + # not support -o with -c + if test no = "$compiler_c_o"; then + output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.$objext + lockfile=$output_obj.lock + else + output_obj= + need_locks=no + lockfile= + fi + + # Lock this critical section if it is needed + # We use this script file to make the link, it avoids creating a new file + if test yes = "$need_locks"; then + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + elif test warn = "$need_locks"; then + if test -f "$lockfile"; then + $ECHO "\ +*** ERROR, $lockfile exists and contains: +`cat $lockfile 2>/dev/null` + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + func_append removelist " $output_obj" + $ECHO "$srcfile" > "$lockfile" + fi + + $opt_dry_run || $RM $removelist + func_append removelist " $lockfile" + trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 + + func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 + srcfile=$func_to_tool_file_result + func_quote_arg pretty "$srcfile" + qsrcfile=$func_quote_arg_result + + # Only build a PIC object if we are building libtool libraries. + if test yes = "$build_libtool_libs"; then + # Without this assignment, base_compile gets emptied. + fbsd_hideous_sh_bug=$base_compile + + if test no != "$pic_mode"; then + command="$base_compile $qsrcfile $pic_flag" + else + # Don't build PIC code + command="$base_compile $qsrcfile" + fi + + func_mkdir_p "$xdir$objdir" + + if test -z "$output_obj"; then + # Place PIC objects in $objdir + func_append command " -o $lobj" + fi + + func_show_eval_locale "$command" \ + 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed, then go on to compile the next one + if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then + func_show_eval '$MV "$output_obj" "$lobj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + + # Allow error messages only from the first compilation. + if test yes = "$suppress_opt"; then + suppress_output=' >/dev/null 2>&1' + fi + fi + + # Only build a position-dependent object if we build old libraries. + if test yes = "$build_old_libs"; then + if test yes != "$pic_mode"; then + # Don't build PIC code + command="$base_compile $qsrcfile$pie_flag" + else + command="$base_compile $qsrcfile $pic_flag" + fi + if test yes = "$compiler_c_o"; then + func_append command " -o $obj" + fi + + # Suppress compiler output if we already did a PIC compilation. + func_append command "$suppress_output" + func_show_eval_locale "$command" \ + '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' + + if test warn = "$need_locks" && + test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then + $ECHO "\ +*** ERROR, $lockfile contains: +`cat $lockfile 2>/dev/null` + +but it should contain: +$srcfile + +This indicates that another process is trying to use the same +temporary object file, and libtool could not work around it because +your compiler does not support '-c' and '-o' together. If you +repeat this compilation, it may succeed, by chance, but you had better +avoid parallel builds (make -j) in this platform, or get a better +compiler." + + $opt_dry_run || $RM $removelist + exit $EXIT_FAILURE + fi + + # Just move the object if needed + if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then + func_show_eval '$MV "$output_obj" "$obj"' \ + 'error=$?; $opt_dry_run || $RM $removelist; exit $error' + fi + fi + + $opt_dry_run || { + func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" + + # Unlock the critical section if it was locked + if test no != "$need_locks"; then + removelist=$lockfile + $RM "$lockfile" + fi + } + + exit $EXIT_SUCCESS +} + +$opt_help || { + test compile = "$opt_mode" && func_mode_compile ${1+"$@"} +} + +func_mode_help () +{ + # We need to display help for each of the modes. + case $opt_mode in + "") + # Generic help is extracted from the usage comments + # at the start of this file. + func_help + ;; + + clean) + $ECHO \ +"Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... + +Remove files from the build directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, object or program, all the files associated +with it are deleted. Otherwise, only FILE itself is deleted using RM." + ;; + + compile) + $ECHO \ +"Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE + +Compile a source file into a libtool library object. + +This mode accepts the following additional options: + + -o OUTPUT-FILE set the output file name to OUTPUT-FILE + -no-suppress do not suppress compiler output for multiple passes + -prefer-pic try to build PIC objects only + -prefer-non-pic try to build non-PIC objects only + -shared do not build a '.o' file suitable for static linking + -static only build a '.o' file suitable for static linking + -Wc,FLAG + -Xcompiler FLAG pass FLAG directly to the compiler + +COMPILE-COMMAND is a command to be used in creating a 'standard' object file +from the given SOURCEFILE. + +The output file name is determined by removing the directory component from +SOURCEFILE, then substituting the C source code suffix '.c' with the +library object suffix, '.lo'." + ;; + + execute) + $ECHO \ +"Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... + +Automatically set library path, then run a program. + +This mode accepts the following additional options: + + -dlopen FILE add the directory containing FILE to the library path + +This mode sets the library path environment variable according to '-dlopen' +flags. + +If any of the ARGS are libtool executable wrappers, then they are translated +into their corresponding uninstalled binary, and any of their required library +directories are added to the library path. + +Then, COMMAND is executed, with ARGS as arguments." + ;; + + finish) + $ECHO \ +"Usage: $progname [OPTION]... --mode=finish [LIBDIR]... + +Complete the installation of libtool libraries. + +Each LIBDIR is a directory that contains libtool libraries. + +The commands that this mode executes may require superuser privileges. Use +the '--dry-run' option if you just want to see what would be executed." + ;; + + install) + $ECHO \ +"Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... + +Install executables or libraries. + +INSTALL-COMMAND is the installation command. The first component should be +either the 'install' or 'cp' program. + +The following components of INSTALL-COMMAND are treated specially: + + -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation + +The rest of the components are interpreted as arguments to that command (only +BSD-compatible install options are recognized)." + ;; + + link) + $ECHO \ +"Usage: $progname [OPTION]... --mode=link LINK-COMMAND... + +Link object files or libraries together to form another library, or to +create an executable program. + +LINK-COMMAND is a command using the C compiler that you would use to create +a program from several object files. + +The following components of LINK-COMMAND are treated specially: + + -all-static do not do any dynamic linking at all + -avoid-version do not add a version suffix if possible + -bindir BINDIR specify path to binaries directory (for systems where + libraries must be found in the PATH setting at runtime) + -dlopen FILE '-dlpreopen' FILE if it cannot be dlopened at runtime + -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols + -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) + -export-symbols SYMFILE + try to export only the symbols listed in SYMFILE + -export-symbols-regex REGEX + try to export only the symbols matching REGEX + -LLIBDIR search LIBDIR for required installed libraries + -lNAME OUTPUT-FILE requires the installed library libNAME + -module build a library that can dlopened + -no-fast-install disable the fast-install mode + -no-install link a not-installable executable + -no-undefined declare that a library does not refer to external symbols + -o OUTPUT-FILE create OUTPUT-FILE from the specified objects + -objectlist FILE use a list of object files found in FILE to specify objects + -os2dllname NAME force a short DLL name on OS/2 (no effect on other OSes) + -precious-files-regex REGEX + don't remove output files matching REGEX + -release RELEASE specify package release information + -rpath LIBDIR the created library will eventually be installed in LIBDIR + -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries + -shared only do dynamic linking of libtool libraries + -shrext SUFFIX override the standard shared library file extension + -static do not do any dynamic linking of uninstalled libtool libraries + -static-libtool-libs + do not do any dynamic linking of libtool libraries + -version-info CURRENT[:REVISION[:AGE]] + specify library version info [each variable defaults to 0] + -weak LIBNAME declare that the target provides the LIBNAME interface + -Wc,FLAG + -Xcompiler FLAG pass linker-specific FLAG directly to the compiler + -Wa,FLAG + -Xassembler FLAG pass linker-specific FLAG directly to the assembler + -Wl,FLAG + -Xlinker FLAG pass linker-specific FLAG directly to the linker + -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) + +All other options (arguments beginning with '-') are ignored. + +Every other argument is treated as a filename. Files ending in '.la' are +treated as uninstalled libtool libraries, other files are standard or library +object files. + +If the OUTPUT-FILE ends in '.la', then a libtool library is created, +only library objects ('.lo' files) may be specified, and '-rpath' is +required, except when creating a convenience library. + +If OUTPUT-FILE ends in '.a' or '.lib', then a standard library is created +using 'ar' and 'ranlib', or on Windows using 'lib'. + +If OUTPUT-FILE ends in '.lo' or '.$objext', then a reloadable object file +is created, otherwise an executable program is created." + ;; + + uninstall) + $ECHO \ +"Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... + +Remove libraries from an installation directory. + +RM is the name of the program to use to delete files associated with each FILE +(typically '/bin/rm'). RM-OPTIONS are options (such as '-f') to be passed +to RM. + +If FILE is a libtool library, all the files associated with it are deleted. +Otherwise, only FILE itself is deleted using RM." + ;; + + *) + func_fatal_help "invalid operation mode '$opt_mode'" + ;; + esac + + echo + $ECHO "Try '$progname --help' for more information about other modes." +} + +# Now that we've collected a possible --mode arg, show help if necessary +if $opt_help; then + if test : = "$opt_help"; then + func_mode_help + else + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + func_mode_help + done + } | $SED -n '1p; 2,$s/^Usage:/ or: /p' + { + func_help noexit + for opt_mode in compile link execute install finish uninstall clean; do + echo + func_mode_help + done + } | + $SED '1d + /^When reporting/,/^Report/{ + H + d + } + $x + /information about other modes/d + /more detailed .*MODE/d + s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' + fi + exit $? +fi + + +# func_mode_execute arg... +func_mode_execute () +{ + $debug_cmd + + # The first argument is the command name. + cmd=$nonopt + test -z "$cmd" && \ + func_fatal_help "you must specify a COMMAND" + + # Handle -dlopen flags immediately. + for file in $opt_dlopen; do + test -f "$file" \ + || func_fatal_help "'$file' is not a file" + + dir= + case $file in + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$lib' is not a valid libtool archive" + + # Read the libtool library. + dlname= + library_names= + func_source "$file" + + # Skip this library if it cannot be dlopened. + if test -z "$dlname"; then + # Warn if it was a shared library. + test -n "$library_names" && \ + func_warning "'$file' was not linked with '-export-dynamic'" + continue + fi + + func_dirname "$file" "" "." + dir=$func_dirname_result + + if test -f "$dir/$objdir/$dlname"; then + func_append dir "/$objdir" + else + if test ! -f "$dir/$dlname"; then + func_fatal_error "cannot find '$dlname' in '$dir' or '$dir/$objdir'" + fi + fi + ;; + + *.lo) + # Just add the directory containing the .lo file. + func_dirname "$file" "" "." + dir=$func_dirname_result + ;; + + *) + func_warning "'-dlopen' is ignored for non-libtool libraries and objects" + continue + ;; + esac + + # Get the absolute pathname. + absdir=`cd "$dir" && pwd` + test -n "$absdir" && dir=$absdir + + # Now add the directory to shlibpath_var. + if eval "test -z \"\$$shlibpath_var\""; then + eval "$shlibpath_var=\"\$dir\"" + else + eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" + fi + done + + # This variable tells wrapper scripts just to set shlibpath_var + # rather than running their programs. + libtool_execute_magic=$magic + + # Check if any of the arguments is a wrapper script. + args= + for file + do + case $file in + -* | *.la | *.lo ) ;; + *) + # Do a test to see if this is really a libtool program. + if func_ltwrapper_script_p "$file"; then + func_source "$file" + # Transform arg to wrapped name. + file=$progdir/$program + elif func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + func_source "$func_ltwrapper_scriptname_result" + # Transform arg to wrapped name. + file=$progdir/$program + fi + ;; + esac + # Quote arguments (to preserve shell metacharacters). + func_append_quoted args "$file" + done + + if $opt_dry_run; then + # Display what would be done. + if test -n "$shlibpath_var"; then + eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" + echo "export $shlibpath_var" + fi + $ECHO "$cmd$args" + exit $EXIT_SUCCESS + else + if test -n "$shlibpath_var"; then + # Export the shlibpath_var. + eval "export $shlibpath_var" + fi + + # Restore saved environment variables + for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES + do + eval "if test \"\${save_$lt_var+set}\" = set; then + $lt_var=\$save_$lt_var; export $lt_var + else + $lt_unset $lt_var + fi" + done + + # Now prepare to actually exec the command. + exec_cmd=\$cmd$args + fi +} + +test execute = "$opt_mode" && func_mode_execute ${1+"$@"} + + +# func_mode_finish arg... +func_mode_finish () +{ + $debug_cmd + + libs= + libdirs= + admincmds= + + for opt in "$nonopt" ${1+"$@"} + do + if test -d "$opt"; then + func_append libdirs " $opt" + + elif test -f "$opt"; then + if func_lalib_unsafe_p "$opt"; then + func_append libs " $opt" + else + func_warning "'$opt' is not a valid libtool archive" + fi + + else + func_fatal_error "invalid argument '$opt'" + fi + done + + if test -n "$libs"; then + if test -n "$lt_sysroot"; then + sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` + sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" + else + sysroot_cmd= + fi + + # Remove sysroot references + if $opt_dry_run; then + for lib in $libs; do + echo "removing references to $lt_sysroot and '=' prefixes from $lib" + done + else + tmpdir=`func_mktempdir` + for lib in $libs; do + $SED -e "$sysroot_cmd s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ + > $tmpdir/tmp-la + mv -f $tmpdir/tmp-la $lib + done + ${RM}r "$tmpdir" + fi + fi + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + for libdir in $libdirs; do + if test -n "$finish_cmds"; then + # Do each command in the finish commands. + func_execute_cmds "$finish_cmds" 'admincmds="$admincmds +'"$cmd"'"' + fi + if test -n "$finish_eval"; then + # Do the single finish_eval. + eval cmds=\"$finish_eval\" + $opt_dry_run || eval "$cmds" || func_append admincmds " + $cmds" + fi + done + fi + + # Exit here if they wanted silent mode. + $opt_quiet && exit $EXIT_SUCCESS + + if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then + echo "----------------------------------------------------------------------" + echo "Libraries have been installed in:" + for libdir in $libdirs; do + $ECHO " $libdir" + done + echo + echo "If you ever happen to want to link against installed libraries" + echo "in a given directory, LIBDIR, you must either use libtool, and" + echo "specify the full pathname of the library, or use the '-LLIBDIR'" + echo "flag during linking and do at least one of the following:" + if test -n "$shlibpath_var"; then + echo " - add LIBDIR to the '$shlibpath_var' environment variable" + echo " during execution" + fi + if test -n "$runpath_var"; then + echo " - add LIBDIR to the '$runpath_var' environment variable" + echo " during linking" + fi + if test -n "$hardcode_libdir_flag_spec"; then + libdir=LIBDIR + eval flag=\"$hardcode_libdir_flag_spec\" + + $ECHO " - use the '$flag' linker flag" + fi + if test -n "$admincmds"; then + $ECHO " - have your system administrator run these commands:$admincmds" + fi + if test -f /etc/ld.so.conf; then + echo " - have your system administrator add LIBDIR to '/etc/ld.so.conf'" + fi + echo + + echo "See any operating system documentation about shared libraries for" + case $host in + solaris2.[6789]|solaris2.1[0-9]) + echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" + echo "pages." + ;; + *) + echo "more information, such as the ld(1) and ld.so(8) manual pages." + ;; + esac + echo "----------------------------------------------------------------------" + fi + exit $EXIT_SUCCESS +} + +test finish = "$opt_mode" && func_mode_finish ${1+"$@"} + + +# func_mode_install arg... +func_mode_install () +{ + $debug_cmd + + # There may be an optional sh(1) argument at the beginning of + # install_prog (especially on Windows NT). + if test "$SHELL" = "$nonopt" || test /bin/sh = "$nonopt" || + # Allow the use of GNU shtool's install command. + case $nonopt in *shtool*) :;; *) false;; esac + then + # Aesthetically quote it. + func_quote_arg pretty "$nonopt" + install_prog="$func_quote_arg_result " + arg=$1 + shift + else + install_prog= + arg=$nonopt + fi + + # The real first argument should be the name of the installation program. + # Aesthetically quote it. + func_quote_arg pretty "$arg" + func_append install_prog "$func_quote_arg_result" + install_shared_prog=$install_prog + case " $install_prog " in + *[\\\ /]cp\ *) install_cp=: ;; + *) install_cp=false ;; + esac + + # We need to accept at least all the BSD install flags. + dest= + files= + opts= + prev= + install_type= + isdir=false + stripme= + no_mode=: + for arg + do + arg2= + if test -n "$dest"; then + func_append files " $dest" + dest=$arg + continue + fi + + case $arg in + -d) isdir=: ;; + -f) + if $install_cp; then :; else + prev=$arg + fi + ;; + -g | -m | -o) + prev=$arg + ;; + -s) + stripme=" -s" + continue + ;; + -*) + ;; + *) + # If the previous option needed an argument, then skip it. + if test -n "$prev"; then + if test X-m = "X$prev" && test -n "$install_override_mode"; then + arg2=$install_override_mode + no_mode=false + fi + prev= + else + dest=$arg + continue + fi + ;; + esac + + # Aesthetically quote the argument. + func_quote_arg pretty "$arg" + func_append install_prog " $func_quote_arg_result" + if test -n "$arg2"; then + func_quote_arg pretty "$arg2" + fi + func_append install_shared_prog " $func_quote_arg_result" + done + + test -z "$install_prog" && \ + func_fatal_help "you must specify an install program" + + test -n "$prev" && \ + func_fatal_help "the '$prev' option requires an argument" + + if test -n "$install_override_mode" && $no_mode; then + if $install_cp; then :; else + func_quote_arg pretty "$install_override_mode" + func_append install_shared_prog " -m $func_quote_arg_result" + fi + fi + + if test -z "$files"; then + if test -z "$dest"; then + func_fatal_help "no file or destination specified" + else + func_fatal_help "you must specify a destination" + fi + fi + + # Strip any trailing slash from the destination. + func_stripname '' '/' "$dest" + dest=$func_stripname_result + + # Check to see that the destination is a directory. + test -d "$dest" && isdir=: + if $isdir; then + destdir=$dest + destname= + else + func_dirname_and_basename "$dest" "" "." + destdir=$func_dirname_result + destname=$func_basename_result + + # Not a directory, so check to see that there is only one file specified. + set dummy $files; shift + test "$#" -gt 1 && \ + func_fatal_help "'$dest' is not a directory" + fi + case $destdir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + for file in $files; do + case $file in + *.lo) ;; + *) + func_fatal_help "'$destdir' must be an absolute directory name" + ;; + esac + done + ;; + esac + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + staticlibs= + future_libdirs= + current_libdirs= + for file in $files; do + + # Do each installation. + case $file in + *.$libext) + # Do the static libraries later. + func_append staticlibs " $file" + ;; + + *.la) + func_resolve_sysroot "$file" + file=$func_resolve_sysroot_result + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$file" \ + || func_fatal_help "'$file' is not a valid libtool archive" + + library_names= + old_library= + relink_command= + func_source "$file" + + # Add the libdir to current_libdirs if it is the destination. + if test "X$destdir" = "X$libdir"; then + case "$current_libdirs " in + *" $libdir "*) ;; + *) func_append current_libdirs " $libdir" ;; + esac + else + # Note the libdir as a future libdir. + case "$future_libdirs " in + *" $libdir "*) ;; + *) func_append future_libdirs " $libdir" ;; + esac + fi + + func_dirname "$file" "/" "" + dir=$func_dirname_result + func_append dir "$objdir" + + if test -n "$relink_command"; then + # Determine the prefix the user has applied to our future dir. + inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` + + # Don't allow the user to place us outside of our expected + # location b/c this prevents finding dependent libraries that + # are installed to the same prefix. + # At present, this check doesn't affect windows .dll's that + # are installed into $libdir/../bin (currently, that works fine) + # but it's something to keep an eye on. + test "$inst_prefix_dir" = "$destdir" && \ + func_fatal_error "error: cannot install '$file' to a directory not ending in $libdir" + + if test -n "$inst_prefix_dir"; then + # Stick the inst_prefix_dir data into the link command. + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` + else + relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` + fi + + func_warning "relinking '$file'" + func_show_eval "$relink_command" \ + 'func_fatal_error "error: relink '\''$file'\'' with the above command before installing it"' + fi + + # See the names of the shared library. + set dummy $library_names; shift + if test -n "$1"; then + realname=$1 + shift + + srcname=$realname + test -n "$relink_command" && srcname=${realname}T + + # Install the shared library and build the symlinks. + func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ + 'exit $?' + tstripme=$stripme + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + case $realname in + *.dll.a) + tstripme= + ;; + esac + ;; + os2*) + case $realname in + *_dll.a) + tstripme= + ;; + esac + ;; + esac + if test -n "$tstripme" && test -n "$striplib"; then + func_show_eval "$striplib $destdir/$realname" 'exit $?' + fi + + if test "$#" -gt 0; then + # Delete the old symlinks, and create new ones. + # Try 'ln -sf' first, because the 'ln' binary might depend on + # the symlink we replace! Solaris /bin/ln does not understand -f, + # so we also need to try rm && ln -s. + for linkname + do + test "$linkname" != "$realname" \ + && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" + done + fi + + # Do each command in the postinstall commands. + lib=$destdir/$realname + func_execute_cmds "$postinstall_cmds" 'exit $?' + fi + + # Install the pseudo-library for information purposes. + func_basename "$file" + name=$func_basename_result + instname=$dir/${name}i + func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' + + # Maybe install the static library, too. + test -n "$old_library" && func_append staticlibs " $dir/$old_library" + ;; + + *.lo) + # Install (i.e. copy) a libtool object. + + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # Deduce the name of the destination old-style object file. + case $destfile in + *.lo) + func_lo2o "$destfile" + staticdest=$func_lo2o_result + ;; + *.$objext) + staticdest=$destfile + destfile= + ;; + *) + func_fatal_help "cannot copy a libtool object to '$destfile'" + ;; + esac + + # Install the libtool object if requested. + test -n "$destfile" && \ + func_show_eval "$install_prog $file $destfile" 'exit $?' + + # Install the old object if enabled. + if test yes = "$build_old_libs"; then + # Deduce the name of the old-style object file. + func_lo2o "$file" + staticobj=$func_lo2o_result + func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' + fi + exit $EXIT_SUCCESS + ;; + + *) + # Figure out destination file name, if it wasn't already specified. + if test -n "$destname"; then + destfile=$destdir/$destname + else + func_basename "$file" + destfile=$func_basename_result + destfile=$destdir/$destfile + fi + + # If the file is missing, and there is a .exe on the end, strip it + # because it is most likely a libtool script we actually want to + # install + stripped_ext= + case $file in + *.exe) + if test ! -f "$file"; then + func_stripname '' '.exe' "$file" + file=$func_stripname_result + stripped_ext=.exe + fi + ;; + esac + + # Do a test to see if this is really a libtool program. + case $host in + *cygwin* | *mingw*) + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + wrapper=$func_ltwrapper_scriptname_result + else + func_stripname '' '.exe' "$file" + wrapper=$func_stripname_result + fi + ;; + *) + wrapper=$file + ;; + esac + if func_ltwrapper_script_p "$wrapper"; then + notinst_deplibs= + relink_command= + + func_source "$wrapper" + + # Check the variables that should have been set. + test -z "$generated_by_libtool_version" && \ + func_fatal_error "invalid libtool wrapper script '$wrapper'" + + finalize=: + for lib in $notinst_deplibs; do + # Check to see that each library is installed. + libdir= + if test -f "$lib"; then + func_source "$lib" + fi + libfile=$libdir/`$ECHO "$lib" | $SED 's%^.*/%%g'` + if test -n "$libdir" && test ! -f "$libfile"; then + func_warning "'$lib' has not been installed in '$libdir'" + finalize=false + fi + done + + relink_command= + func_source "$wrapper" + + outputname= + if test no = "$fast_install" && test -n "$relink_command"; then + $opt_dry_run || { + if $finalize; then + tmpdir=`func_mktempdir` + func_basename "$file$stripped_ext" + file=$func_basename_result + outputname=$tmpdir/$file + # Replace the output file specification. + relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` + + $opt_quiet || { + func_quote_arg expand,pretty "$relink_command" + eval "func_echo $func_quote_arg_result" + } + if eval "$relink_command"; then : + else + func_error "error: relink '$file' with the above command before installing it" + $opt_dry_run || ${RM}r "$tmpdir" + continue + fi + file=$outputname + else + func_warning "cannot relink '$file'" + fi + } + else + # Install the binary that we compiled earlier. + file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` + fi + fi + + # remove .exe since cygwin /usr/bin/install will append another + # one anyway + case $install_prog,$host in + */usr/bin/install*,*cygwin*) + case $file:$destfile in + *.exe:*.exe) + # this is ok + ;; + *.exe:*) + destfile=$destfile.exe + ;; + *:*.exe) + func_stripname '' '.exe' "$destfile" + destfile=$func_stripname_result + ;; + esac + ;; + esac + func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' + $opt_dry_run || if test -n "$outputname"; then + ${RM}r "$tmpdir" + fi + ;; + esac + done + + for file in $staticlibs; do + func_basename "$file" + name=$func_basename_result + + # Set up the ranlib parameters. + oldlib=$destdir/$name + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + + func_show_eval "$install_prog \$file \$oldlib" 'exit $?' + + if test -n "$stripme" && test -n "$old_striplib"; then + func_show_eval "$old_striplib $tool_oldlib" 'exit $?' + fi + + # Do each command in the postinstall commands. + func_execute_cmds "$old_postinstall_cmds" 'exit $?' + done + + test -n "$future_libdirs" && \ + func_warning "remember to run '$progname --finish$future_libdirs'" + + if test -n "$current_libdirs"; then + # Maybe just do a dry run. + $opt_dry_run && current_libdirs=" -n$current_libdirs" + exec_cmd='$SHELL "$progpath" $preserve_args --finish$current_libdirs' + else + exit $EXIT_SUCCESS + fi +} + +test install = "$opt_mode" && func_mode_install ${1+"$@"} + + +# func_generate_dlsyms outputname originator pic_p +# Extract symbols from dlprefiles and create ${outputname}S.o with +# a dlpreopen symbol table. +func_generate_dlsyms () +{ + $debug_cmd + + my_outputname=$1 + my_originator=$2 + my_pic_p=${3-false} + my_prefix=`$ECHO "$my_originator" | $SED 's%[^a-zA-Z0-9]%_%g'` + my_dlsyms= + + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + if test -n "$NM" && test -n "$global_symbol_pipe"; then + my_dlsyms=${my_outputname}S.c + else + func_error "not configured to extract global symbols from dlpreopened files" + fi + fi + + if test -n "$my_dlsyms"; then + case $my_dlsyms in + "") ;; + *.c) + # Discover the nlist of each of the dlfiles. + nlist=$output_objdir/$my_outputname.nm + + func_show_eval "$RM $nlist ${nlist}S ${nlist}T" + + # Parse the name list into a source file. + func_verbose "creating $output_objdir/$my_dlsyms" + + $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ +/* $my_dlsyms - symbol resolution table for '$my_outputname' dlsym emulation. */ +/* Generated by $PROGRAM (GNU $PACKAGE) $VERSION */ + +#ifdef __cplusplus +extern \"C\" { +#endif + +#if defined __GNUC__ && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) +#pragma GCC diagnostic ignored \"-Wstrict-prototypes\" +#endif + +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT_DLSYM_CONST +#else +# define LT_DLSYM_CONST const +#endif + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* External symbol declarations for the compiler. */\ +" + + if test yes = "$dlself"; then + func_verbose "generating symbol list for '$output'" + + $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" + + # Add our own program objects to the symbol list. + progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` + for progfile in $progfiles; do + func_to_tool_file "$progfile" func_convert_file_msys_to_w32 + func_verbose "extracting global C symbols from '$func_to_tool_file_result'" + $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" + done + + if test -n "$exclude_expsyms"; then + $opt_dry_run || { + eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + if test -n "$export_symbols_regex"; then + $opt_dry_run || { + eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + } + fi + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + export_symbols=$output_objdir/$outputname.exp + $opt_dry_run || { + $RM $export_symbols + eval "$SED -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' + ;; + esac + } + else + $opt_dry_run || { + eval "$SED -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' + eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' + eval '$MV "$nlist"T "$nlist"' + case $host in + *cygwin* | *mingw* | *cegcc* ) + eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' + eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' + ;; + esac + } + fi + fi + + for dlprefile in $dlprefiles; do + func_verbose "extracting global C symbols from '$dlprefile'" + func_basename "$dlprefile" + name=$func_basename_result + case $host in + *cygwin* | *mingw* | *cegcc* ) + # if an import library, we need to obtain dlname + if func_win32_import_lib_p "$dlprefile"; then + func_tr_sh "$dlprefile" + eval "curr_lafile=\$libfile_$func_tr_sh_result" + dlprefile_dlbasename= + if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then + # Use subshell, to avoid clobbering current variable values + dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` + if test -n "$dlprefile_dlname"; then + func_basename "$dlprefile_dlname" + dlprefile_dlbasename=$func_basename_result + else + # no lafile. user explicitly requested -dlpreopen . + $sharedlib_from_linklib_cmd "$dlprefile" + dlprefile_dlbasename=$sharedlib_from_linklib_result + fi + fi + $opt_dry_run || { + if test -n "$dlprefile_dlbasename"; then + eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' + else + func_warning "Could not compute DLL name from $name" + eval '$ECHO ": $name " >> "$nlist"' + fi + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | + $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" + } + else # not an import lib + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + fi + ;; + *) + $opt_dry_run || { + eval '$ECHO ": $name " >> "$nlist"' + func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 + eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" + } + ;; + esac + done + + $opt_dry_run || { + # Make sure we have at least an empty file. + test -f "$nlist" || : > "$nlist" + + if test -n "$exclude_expsyms"; then + $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T + $MV "$nlist"T "$nlist" + fi + + # Try sorting and uniquifying the output. + if $GREP -v "^: " < "$nlist" | + if sort -k 3 /dev/null 2>&1; then + sort -k 3 + else + sort +2 + fi | + uniq > "$nlist"S; then + : + else + $GREP -v "^: " < "$nlist" > "$nlist"S + fi + + if test -f "$nlist"S; then + eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' + else + echo '/* NONE */' >> "$output_objdir/$my_dlsyms" + fi + + func_show_eval '$RM "${nlist}I"' + if test -n "$global_symbol_to_import"; then + eval "$global_symbol_to_import"' < "$nlist"S > "$nlist"I' + fi + + echo >> "$output_objdir/$my_dlsyms" "\ + +/* The mapping between symbol names and symbols. */ +typedef struct { + const char *name; + void *address; +} lt_dlsymlist; +extern LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[];\ +" + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ +static void lt_syminit(void) +{ + LT_DLSYM_CONST lt_dlsymlist *symbol = lt_${my_prefix}_LTX_preloaded_symbols; + for (; symbol->name; ++symbol) + {" + $SED 's/.*/ if (STREQ (symbol->name, \"&\")) symbol->address = (void *) \&&;/' < "$nlist"I >> "$output_objdir/$my_dlsyms" + echo >> "$output_objdir/$my_dlsyms" "\ + } +}" + fi + echo >> "$output_objdir/$my_dlsyms" "\ +LT_DLSYM_CONST lt_dlsymlist +lt_${my_prefix}_LTX_preloaded_symbols[] = +{ {\"$my_originator\", (void *) 0}," + + if test -s "$nlist"I; then + echo >> "$output_objdir/$my_dlsyms" "\ + {\"@INIT@\", (void *) <_syminit}," + fi + + case $need_lib_prefix in + no) + eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + *) + eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" + ;; + esac + echo >> "$output_objdir/$my_dlsyms" "\ + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt_${my_prefix}_LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif\ +" + } # !$opt_dry_run + + pic_flag_for_symtable= + case "$compile_command " in + *" -static "*) ;; + *) + case $host in + # compiling the symbol table file with pic_flag works around + # a FreeBSD bug that causes programs to crash when -lm is + # linked before any other PIC object. But we must not use + # pic_flag when linking with -static. The problem exists in + # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. + *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) + pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; + *-*-hpux*) + pic_flag_for_symtable=" $pic_flag" ;; + *) + $my_pic_p && pic_flag_for_symtable=" $pic_flag" + ;; + esac + ;; + esac + symtab_cflags= + for arg in $LTCFLAGS; do + case $arg in + -pie | -fpie | -fPIE) ;; + *) func_append symtab_cflags " $arg" ;; + esac + done + + # Now compile the dynamic symbol file. + func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' + + # Clean up the generated files. + func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T" "${nlist}I"' + + # Transform the symbol file into the correct name. + symfileobj=$output_objdir/${my_outputname}S.$objext + case $host in + *cygwin* | *mingw* | *cegcc* ) + if test -f "$output_objdir/$my_outputname.def"; then + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` + else + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + fi + ;; + *) + compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` + ;; + esac + ;; + *) + func_fatal_error "unknown suffix for '$my_dlsyms'" + ;; + esac + else + # We keep going just in case the user didn't refer to + # lt_preloaded_symbols. The linker will fail if global_symbol_pipe + # really was required. + + # Nullify the symbol file. + compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` + finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` + fi +} + +# func_cygming_gnu_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is a GNU/binutils-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_gnu_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` + test -n "$func_cygming_gnu_implib_tmp" +} + +# func_cygming_ms_implib_p ARG +# This predicate returns with zero status (TRUE) if +# ARG is an MS-style import library. Returns +# with nonzero status (FALSE) otherwise. +func_cygming_ms_implib_p () +{ + $debug_cmd + + func_to_tool_file "$1" func_convert_file_msys_to_w32 + func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` + test -n "$func_cygming_ms_implib_tmp" +} + +# func_win32_libid arg +# return the library type of file 'arg' +# +# Need a lot of goo to handle *both* DLLs and import libs +# Has to be a shell function in order to 'eat' the argument +# that is supplied when $file_magic_command is called. +# Despite the name, also deal with 64 bit binaries. +func_win32_libid () +{ + $debug_cmd + + win32_libid_type=unknown + win32_fileres=`file -L $1 2>/dev/null` + case $win32_fileres in + *ar\ archive\ import\ library*) # definitely import + win32_libid_type="x86 archive import" + ;; + *ar\ archive*) # could be an import, or static + # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. + if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | + $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then + case $nm_interface in + "MS dumpbin") + if func_cygming_ms_implib_p "$1" || + func_cygming_gnu_implib_p "$1" + then + win32_nmres=import + else + win32_nmres= + fi + ;; + *) + func_to_tool_file "$1" func_convert_file_msys_to_w32 + win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | + $SED -n -e ' + 1,100{ + / I /{ + s|.*|import| + p + q + } + }'` + ;; + esac + case $win32_nmres in + import*) win32_libid_type="x86 archive import";; + *) win32_libid_type="x86 archive static";; + esac + fi + ;; + *DLL*) + win32_libid_type="x86 DLL" + ;; + *executable*) # but shell scripts are "executable" too... + case $win32_fileres in + *MS\ Windows\ PE\ Intel*) + win32_libid_type="x86 DLL" + ;; + esac + ;; + esac + $ECHO "$win32_libid_type" +} + +# func_cygming_dll_for_implib ARG +# +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib () +{ + $debug_cmd + + sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` +} + +# func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs +# +# The is the core of a fallback implementation of a +# platform-specific function to extract the name of the +# DLL associated with the specified import library LIBNAME. +# +# SECTION_NAME is either .idata$6 or .idata$7, depending +# on the platform and compiler that created the implib. +# +# Echos the name of the DLL associated with the +# specified import library. +func_cygming_dll_for_implib_fallback_core () +{ + $debug_cmd + + match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` + $OBJDUMP -s --section "$1" "$2" 2>/dev/null | + $SED '/^Contents of section '"$match_literal"':/{ + # Place marker at beginning of archive member dllname section + s/.*/====MARK====/ + p + d + } + # These lines can sometimes be longer than 43 characters, but + # are always uninteresting + /:[ ]*file format pe[i]\{,1\}-/d + /^In archive [^:]*:/d + # Ensure marker is printed + /^====MARK====/p + # Remove all lines with less than 43 characters + /^.\{43\}/!d + # From remaining lines, remove first 43 characters + s/^.\{43\}//' | + $SED -n ' + # Join marker and all lines until next marker into a single line + /^====MARK====/ b para + H + $ b para + b + :para + x + s/\n//g + # Remove the marker + s/^====MARK====// + # Remove trailing dots and whitespace + s/[\. \t]*$// + # Print + /./p' | + # we now have a list, one entry per line, of the stringified + # contents of the appropriate section of all members of the + # archive that possess that section. Heuristic: eliminate + # all those that have a first or second character that is + # a '.' (that is, objdump's representation of an unprintable + # character.) This should work for all archives with less than + # 0x302f exports -- but will fail for DLLs whose name actually + # begins with a literal '.' or a single character followed by + # a '.'. + # + # Of those that remain, print the first one. + $SED -e '/^\./d;/^.\./d;q' +} + +# func_cygming_dll_for_implib_fallback ARG +# Platform-specific function to extract the +# name of the DLL associated with the specified +# import library ARG. +# +# This fallback implementation is for use when $DLLTOOL +# does not support the --identify-strict option. +# Invoked by eval'ing the libtool variable +# $sharedlib_from_linklib_cmd +# Result is available in the variable +# $sharedlib_from_linklib_result +func_cygming_dll_for_implib_fallback () +{ + $debug_cmd + + if func_cygming_gnu_implib_p "$1"; then + # binutils import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` + elif func_cygming_ms_implib_p "$1"; then + # ms-generated import library + sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` + else + # unknown + sharedlib_from_linklib_result= + fi +} + + +# func_extract_an_archive dir oldlib +func_extract_an_archive () +{ + $debug_cmd + + f_ex_an_ar_dir=$1; shift + f_ex_an_ar_oldlib=$1 + if test yes = "$lock_old_archive_extraction"; then + lockfile=$f_ex_an_ar_oldlib.lock + until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do + func_echo "Waiting for $lockfile to be removed" + sleep 2 + done + fi + func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ + 'stat=$?; rm -f "$lockfile"; exit $stat' + if test yes = "$lock_old_archive_extraction"; then + $opt_dry_run || rm -f "$lockfile" + fi + if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then + : + else + func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" + fi +} + + +# func_extract_archives gentop oldlib ... +func_extract_archives () +{ + $debug_cmd + + my_gentop=$1; shift + my_oldlibs=${1+"$@"} + my_oldobjs= + my_xlib= + my_xabs= + my_xdir= + + for my_xlib in $my_oldlibs; do + # Extract the objects. + case $my_xlib in + [\\/]* | [A-Za-z]:[\\/]*) my_xabs=$my_xlib ;; + *) my_xabs=`pwd`"/$my_xlib" ;; + esac + func_basename "$my_xlib" + my_xlib=$func_basename_result + my_xlib_u=$my_xlib + while :; do + case " $extracted_archives " in + *" $my_xlib_u "*) + func_arith $extracted_serial + 1 + extracted_serial=$func_arith_result + my_xlib_u=lt$extracted_serial-$my_xlib ;; + *) break ;; + esac + done + extracted_archives="$extracted_archives $my_xlib_u" + my_xdir=$my_gentop/$my_xlib_u + + func_mkdir_p "$my_xdir" + + case $host in + *-darwin*) + func_verbose "Extracting $my_xabs" + # Do not bother doing anything if just a dry run + $opt_dry_run || { + darwin_orig_dir=`pwd` + cd $my_xdir || exit $? + darwin_archive=$my_xabs + darwin_curdir=`pwd` + func_basename "$darwin_archive" + darwin_base_archive=$func_basename_result + darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` + if test -n "$darwin_arches"; then + darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` + darwin_arch= + func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" + for darwin_arch in $darwin_arches; do + func_mkdir_p "unfat-$$/$darwin_base_archive-$darwin_arch" + $LIPO -thin $darwin_arch -output "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" "$darwin_archive" + cd "unfat-$$/$darwin_base_archive-$darwin_arch" + func_extract_an_archive "`pwd`" "$darwin_base_archive" + cd "$darwin_curdir" + $RM "unfat-$$/$darwin_base_archive-$darwin_arch/$darwin_base_archive" + done # $darwin_arches + ## Okay now we've a bunch of thin objects, gotta fatten them up :) + darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$sed_basename" | sort -u` + darwin_file= + darwin_files= + for darwin_file in $darwin_filelist; do + darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` + $LIPO -create -output "$darwin_file" $darwin_files + done # $darwin_filelist + $RM -rf unfat-$$ + cd "$darwin_orig_dir" + else + cd $darwin_orig_dir + func_extract_an_archive "$my_xdir" "$my_xabs" + fi # $darwin_arches + } # !$opt_dry_run + ;; + *) + func_extract_an_archive "$my_xdir" "$my_xabs" + ;; + esac + my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` + done + + func_extract_archives_result=$my_oldobjs +} + + +# func_emit_wrapper [arg=no] +# +# Emit a libtool wrapper script on stdout. +# Don't directly open a file because we may want to +# incorporate the script contents within a cygwin/mingw +# wrapper executable. Must ONLY be called from within +# func_mode_link because it depends on a number of variables +# set therein. +# +# ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR +# variable will take. If 'yes', then the emitted script +# will assume that the directory where it is stored is +# the $objdir directory. This is a cygwin/mingw-specific +# behavior. +func_emit_wrapper () +{ + func_emit_wrapper_arg1=${1-no} + + $ECHO "\ +#! $SHELL + +# $output - temporary wrapper script for $objdir/$outputname +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# The $output program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='$sed_quote_subst' + +# Be Bourne compatible +if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command=\"$relink_command\" + +# This environment variable determines our operation mode. +if test \"\$libtool_install_magic\" = \"$magic\"; then + # install mode needs the following variables: + generated_by_libtool_version='$macro_version' + notinst_deplibs='$notinst_deplibs' +else + # When we are sourced in execute mode, \$file and \$ECHO are already set. + if test \"\$libtool_execute_magic\" != \"$magic\"; then + file=\"\$0\"" + + func_quote_arg pretty "$ECHO" + qECHO=$func_quote_arg_result + $ECHO "\ + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$1 +_LTECHO_EOF' +} + ECHO=$qECHO + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ that is used only on +# windows platforms, and (c) all begin with the string "--lt-" +# (application programs are unlikely to have options that match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's $0 value, followed by "$@". +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=\$0 + shift + for lt_opt + do + case \"\$lt_opt\" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` + test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. + lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` + cat \"\$lt_dump_D/\$lt_dump_F\" + exit 0 + ;; + --lt-*) + \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n \"\$lt_option_debug\"; then + echo \"$outputname:$output:\$LINENO: libtool wrapper (GNU $PACKAGE) $VERSION\" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + \$ECHO \"$outputname:$output:\$LINENO: newargv[\$lt_dump_args_N]: \$lt_arg\" + lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ +" + case $host in + # Backslashes separate directories on plain windows + *-*-mingw | *-*-os2* | *-cegcc*) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir\\\\\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} +" + ;; + + *) + $ECHO "\ + if test -n \"\$lt_option_debug\"; then + \$ECHO \"$outputname:$output:\$LINENO: newargv[0]: \$progdir/\$program\" 1>&2 + func_lt_dump_args \${1+\"\$@\"} 1>&2 + fi + exec \"\$progdir/\$program\" \${1+\"\$@\"} +" + ;; + esac + $ECHO "\ + \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from \$@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case \" \$* \" in + *\\ --lt-*) + for lt_wr_arg + do + case \$lt_wr_arg in + --lt-*) ;; + *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; + esac + shift + done ;; + esac + func_exec_program_core \${1+\"\$@\"} +} + + # Parse options + func_parse_lt_options \"\$0\" \${1+\"\$@\"} + + # Find the directory that this script lives in. + thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` + test \"x\$thisdir\" = \"x\$file\" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` + while test -n \"\$file\"; do + destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` + + # If there was a directory component, then change thisdir. + if test \"x\$destdir\" != \"x\$file\"; then + case \"\$destdir\" in + [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; + *) thisdir=\"\$thisdir/\$destdir\" ;; + esac + fi + + file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` + file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 + if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then + # special case for '.' + if test \"\$thisdir\" = \".\"; then + thisdir=\`pwd\` + fi + # remove .libs from thisdir + case \"\$thisdir\" in + *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; + $objdir ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=\`cd \"\$thisdir\" && pwd\` + test -n \"\$absdir\" && thisdir=\"\$absdir\" +" + + if test yes = "$fast_install"; then + $ECHO "\ + program=lt-'$outputname'$exeext + progdir=\"\$thisdir/$objdir\" + + if test ! -f \"\$progdir/\$program\" || + { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | $SED 1q\`; \\ + test \"X\$file\" != \"X\$progdir/\$program\"; }; then + + file=\"\$\$-\$program\" + + if test ! -d \"\$progdir\"; then + $MKDIR \"\$progdir\" + else + $RM \"\$progdir/\$file\" + fi" + + $ECHO "\ + + # relink executable if necessary + if test -n \"\$relink_command\"; then + if relink_command_output=\`eval \$relink_command 2>&1\`; then : + else + \$ECHO \"\$relink_command_output\" >&2 + $RM \"\$progdir/\$file\" + exit 1 + fi + fi + + $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || + { $RM \"\$progdir/\$program\"; + $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } + $RM \"\$progdir/\$file\" + fi" + else + $ECHO "\ + program='$outputname' + progdir=\"\$thisdir/$objdir\" +" + fi + + $ECHO "\ + + if test -f \"\$progdir/\$program\"; then" + + # fixup the dll searchpath if we need to. + # + # Fix the DLL searchpath if we need to. Do this before prepending + # to shlibpath, because on Windows, both are PATH and uninstalled + # libraries must come first. + if test -n "$dllsearchpath"; then + $ECHO "\ + # Add the dll search path components to the executable PATH + PATH=$dllsearchpath:\$PATH +" + fi + + # Export our shlibpath_var if we have one. + if test yes = "$shlibpath_overrides_runpath" && test -n "$shlibpath_var" && test -n "$temp_rpath"; then + $ECHO "\ + # Add our own library path to $shlibpath_var + $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" + + # Some systems cannot cope with colon-terminated $shlibpath_var + # The second colon is a workaround for a bug in BeOS R4 sed + $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` + + export $shlibpath_var +" + fi + + $ECHO "\ + if test \"\$libtool_execute_magic\" != \"$magic\"; then + # Run the actual program with our arguments. + func_exec_program \${1+\"\$@\"} + fi + else + # The program doesn't exist. + \$ECHO \"\$0: error: '\$progdir/\$program' does not exist\" 1>&2 + \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 + \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 + exit 1 + fi +fi\ +" +} + + +# func_emit_cwrapperexe_src +# emit the source code for a wrapper executable on stdout +# Must ONLY be called from within func_mode_link because +# it depends on a number of variable set therein. +func_emit_cwrapperexe_src () +{ + cat < +#include +#ifdef _MSC_VER +# include +# include +# include +#else +# include +# include +# ifdef __CYGWIN__ +# include +# endif +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define STREQ(s1, s2) (strcmp ((s1), (s2)) == 0) + +/* declarations of non-ANSI functions */ +#if defined __MINGW32__ +# ifdef __STRICT_ANSI__ +int _putenv (const char *); +# endif +#elif defined __CYGWIN__ +# ifdef __STRICT_ANSI__ +char *realpath (const char *, char *); +int putenv (char *); +int setenv (const char *, const char *, int); +# endif +/* #elif defined other_platform || defined ... */ +#endif + +/* portability defines, excluding path handling macros */ +#if defined _MSC_VER +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +# define S_IXUSR _S_IEXEC +#elif defined __MINGW32__ +# define setmode _setmode +# define stat _stat +# define chmod _chmod +# define getcwd _getcwd +# define putenv _putenv +#elif defined __CYGWIN__ +# define HAVE_SETENV +# define FOPEN_WB "wb" +/* #elif defined other platforms ... */ +#endif + +#if defined PATH_MAX +# define LT_PATHMAX PATH_MAX +#elif defined MAXPATHLEN +# define LT_PATHMAX MAXPATHLEN +#else +# define LT_PATHMAX 1024 +#endif + +#ifndef S_IXOTH +# define S_IXOTH 0 +#endif +#ifndef S_IXGRP +# define S_IXGRP 0 +#endif + +/* path handling portability macros */ +#ifndef DIR_SEPARATOR +# define DIR_SEPARATOR '/' +# define PATH_SEPARATOR ':' +#endif + +#if defined _WIN32 || defined __MSDOS__ || defined __DJGPP__ || \ + defined __OS2__ +# define HAVE_DOS_BASED_FILE_SYSTEM +# define FOPEN_WB "wb" +# ifndef DIR_SEPARATOR_2 +# define DIR_SEPARATOR_2 '\\' +# endif +# ifndef PATH_SEPARATOR_2 +# define PATH_SEPARATOR_2 ';' +# endif +#endif + +#ifndef DIR_SEPARATOR_2 +# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) +#else /* DIR_SEPARATOR_2 */ +# define IS_DIR_SEPARATOR(ch) \ + (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) +#endif /* DIR_SEPARATOR_2 */ + +#ifndef PATH_SEPARATOR_2 +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) +#else /* PATH_SEPARATOR_2 */ +# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) +#endif /* PATH_SEPARATOR_2 */ + +#ifndef FOPEN_WB +# define FOPEN_WB "w" +#endif +#ifndef _O_BINARY +# define _O_BINARY 0 +#endif + +#define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) +#define XFREE(stale) do { \ + if (stale) { free (stale); stale = 0; } \ +} while (0) + +#if defined LT_DEBUGWRAPPER +static int lt_debug = 1; +#else +static int lt_debug = 0; +#endif + +const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ + +void *xmalloc (size_t num); +char *xstrdup (const char *string); +const char *base_name (const char *name); +char *find_executable (const char *wrapper); +char *chase_symlinks (const char *pathspec); +int make_executable (const char *path); +int check_executable (const char *path); +char *strendzap (char *str, const char *pat); +void lt_debugprintf (const char *file, int line, const char *fmt, ...); +void lt_fatal (const char *file, int line, const char *message, ...); +static const char *nonnull (const char *s); +static const char *nonempty (const char *s); +void lt_setenv (const char *name, const char *value); +char *lt_extend_str (const char *orig_value, const char *add, int to_end); +void lt_update_exe_path (const char *name, const char *value); +void lt_update_lib_path (const char *name, const char *value); +char **prepare_spawn (char **argv); +void lt_dump_script (FILE *f); +EOF + + cat <= 0) + && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return 1; + else + return 0; +} + +int +make_executable (const char *path) +{ + int rval = 0; + struct stat st; + + lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", + nonempty (path)); + if ((!path) || (!*path)) + return 0; + + if (stat (path, &st) >= 0) + { + rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); + } + return rval; +} + +/* Searches for the full path of the wrapper. Returns + newly allocated full path name if found, NULL otherwise + Does not chase symlinks, even on platforms that support them. +*/ +char * +find_executable (const char *wrapper) +{ + int has_slash = 0; + const char *p; + const char *p_next; + /* static buffer for getcwd */ + char tmp[LT_PATHMAX + 1]; + size_t tmp_len; + char *concat_name; + + lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", + nonempty (wrapper)); + + if ((wrapper == NULL) || (*wrapper == '\0')) + return NULL; + + /* Absolute path? */ +#if defined HAVE_DOS_BASED_FILE_SYSTEM + if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + else + { +#endif + if (IS_DIR_SEPARATOR (wrapper[0])) + { + concat_name = xstrdup (wrapper); + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } +#if defined HAVE_DOS_BASED_FILE_SYSTEM + } +#endif + + for (p = wrapper; *p; p++) + if (*p == '/') + { + has_slash = 1; + break; + } + if (!has_slash) + { + /* no slashes; search PATH */ + const char *path = getenv ("PATH"); + if (path != NULL) + { + for (p = path; *p; p = p_next) + { + const char *q; + size_t p_len; + for (q = p; *q; q++) + if (IS_PATH_SEPARATOR (*q)) + break; + p_len = (size_t) (q - p); + p_next = (*q == '\0' ? q : q + 1); + if (p_len == 0) + { + /* empty path: current directory */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = + XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + } + else + { + concat_name = + XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, p, p_len); + concat_name[p_len] = '/'; + strcpy (concat_name + p_len + 1, wrapper); + } + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + } + } + /* not found in PATH; assume curdir */ + } + /* Relative path | not found in path: prepend cwd */ + if (getcwd (tmp, LT_PATHMAX) == NULL) + lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", + nonnull (strerror (errno))); + tmp_len = strlen (tmp); + concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); + memcpy (concat_name, tmp, tmp_len); + concat_name[tmp_len] = '/'; + strcpy (concat_name + tmp_len + 1, wrapper); + + if (check_executable (concat_name)) + return concat_name; + XFREE (concat_name); + return NULL; +} + +char * +chase_symlinks (const char *pathspec) +{ +#ifndef S_ISLNK + return xstrdup (pathspec); +#else + char buf[LT_PATHMAX]; + struct stat s; + char *tmp_pathspec = xstrdup (pathspec); + char *p; + int has_symlinks = 0; + while (strlen (tmp_pathspec) && !has_symlinks) + { + lt_debugprintf (__FILE__, __LINE__, + "checking path component for symlinks: %s\n", + tmp_pathspec); + if (lstat (tmp_pathspec, &s) == 0) + { + if (S_ISLNK (s.st_mode) != 0) + { + has_symlinks = 1; + break; + } + + /* search backwards for last DIR_SEPARATOR */ + p = tmp_pathspec + strlen (tmp_pathspec) - 1; + while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + p--; + if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) + { + /* no more DIR_SEPARATORS left */ + break; + } + *p = '\0'; + } + else + { + lt_fatal (__FILE__, __LINE__, + "error accessing file \"%s\": %s", + tmp_pathspec, nonnull (strerror (errno))); + } + } + XFREE (tmp_pathspec); + + if (!has_symlinks) + { + return xstrdup (pathspec); + } + + tmp_pathspec = realpath (pathspec, buf); + if (tmp_pathspec == 0) + { + lt_fatal (__FILE__, __LINE__, + "could not follow symlinks for %s", pathspec); + } + return xstrdup (tmp_pathspec); +#endif +} + +char * +strendzap (char *str, const char *pat) +{ + size_t len, patlen; + + assert (str != NULL); + assert (pat != NULL); + + len = strlen (str); + patlen = strlen (pat); + + if (patlen <= len) + { + str += len - patlen; + if (STREQ (str, pat)) + *str = '\0'; + } + return str; +} + +void +lt_debugprintf (const char *file, int line, const char *fmt, ...) +{ + va_list args; + if (lt_debug) + { + (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); + va_start (args, fmt); + (void) vfprintf (stderr, fmt, args); + va_end (args); + } +} + +static void +lt_error_core (int exit_status, const char *file, + int line, const char *mode, + const char *message, va_list ap) +{ + fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); + vfprintf (stderr, message, ap); + fprintf (stderr, ".\n"); + + if (exit_status >= 0) + exit (exit_status); +} + +void +lt_fatal (const char *file, int line, const char *message, ...) +{ + va_list ap; + va_start (ap, message); + lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); + va_end (ap); +} + +static const char * +nonnull (const char *s) +{ + return s ? s : "(null)"; +} + +static const char * +nonempty (const char *s) +{ + return (s && !*s) ? "(empty)" : nonnull (s); +} + +void +lt_setenv (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_setenv) setting '%s' to '%s'\n", + nonnull (name), nonnull (value)); + { +#ifdef HAVE_SETENV + /* always make a copy, for consistency with !HAVE_SETENV */ + char *str = xstrdup (value); + setenv (name, str, 1); +#else + size_t len = strlen (name) + 1 + strlen (value) + 1; + char *str = XMALLOC (char, len); + sprintf (str, "%s=%s", name, value); + if (putenv (str) != EXIT_SUCCESS) + { + XFREE (str); + } +#endif + } +} + +char * +lt_extend_str (const char *orig_value, const char *add, int to_end) +{ + char *new_value; + if (orig_value && *orig_value) + { + size_t orig_value_len = strlen (orig_value); + size_t add_len = strlen (add); + new_value = XMALLOC (char, add_len + orig_value_len + 1); + if (to_end) + { + strcpy (new_value, orig_value); + strcpy (new_value + orig_value_len, add); + } + else + { + strcpy (new_value, add); + strcpy (new_value + add_len, orig_value); + } + } + else + { + new_value = xstrdup (add); + } + return new_value; +} + +void +lt_update_exe_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + /* some systems can't cope with a ':'-terminated path #' */ + size_t len = strlen (new_value); + while ((len > 0) && IS_PATH_SEPARATOR (new_value[len-1])) + { + new_value[--len] = '\0'; + } + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +void +lt_update_lib_path (const char *name, const char *value) +{ + lt_debugprintf (__FILE__, __LINE__, + "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", + nonnull (name), nonnull (value)); + + if (name && *name && value && *value) + { + char *new_value = lt_extend_str (getenv (name), value, 0); + lt_setenv (name, new_value); + XFREE (new_value); + } +} + +EOF + case $host_os in + mingw*) + cat <<"EOF" + +/* Prepares an argument vector before calling spawn(). + Note that spawn() does not by itself call the command interpreter + (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : + ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&v); + v.dwPlatformId == VER_PLATFORM_WIN32_NT; + }) ? "cmd.exe" : "command.com"). + Instead it simply concatenates the arguments, separated by ' ', and calls + CreateProcess(). We must quote the arguments since Win32 CreateProcess() + interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a + special way: + - Space and tab are interpreted as delimiters. They are not treated as + delimiters if they are surrounded by double quotes: "...". + - Unescaped double quotes are removed from the input. Their only effect is + that within double quotes, space and tab are treated like normal + characters. + - Backslashes not followed by double quotes are not special. + - But 2*n+1 backslashes followed by a double quote become + n backslashes followed by a double quote (n >= 0): + \" -> " + \\\" -> \" + \\\\\" -> \\" + */ +#define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +#define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +char ** +prepare_spawn (char **argv) +{ + size_t argc; + char **new_argv; + size_t i; + + /* Count number of arguments. */ + for (argc = 0; argv[argc] != NULL; argc++) + ; + + /* Allocate new argument vector. */ + new_argv = XMALLOC (char *, argc + 1); + + /* Put quoted arguments into the new argument vector. */ + for (i = 0; i < argc; i++) + { + const char *string = argv[i]; + + if (string[0] == '\0') + new_argv[i] = xstrdup ("\"\""); + else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) + { + int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); + size_t length; + unsigned int backslashes; + const char *s; + char *quoted_string; + char *p; + + length = 0; + backslashes = 0; + if (quote_around) + length++; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + length += backslashes + 1; + length++; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + length += backslashes + 1; + + quoted_string = XMALLOC (char, length + 1); + + p = quoted_string; + backslashes = 0; + if (quote_around) + *p++ = '"'; + for (s = string; *s != '\0'; s++) + { + char c = *s; + if (c == '"') + { + unsigned int j; + for (j = backslashes + 1; j > 0; j--) + *p++ = '\\'; + } + *p++ = c; + if (c == '\\') + backslashes++; + else + backslashes = 0; + } + if (quote_around) + { + unsigned int j; + for (j = backslashes; j > 0; j--) + *p++ = '\\'; + *p++ = '"'; + } + *p = '\0'; + + new_argv[i] = quoted_string; + } + else + new_argv[i] = (char *) string; + } + new_argv[argc] = NULL; + + return new_argv; +} +EOF + ;; + esac + + cat <<"EOF" +void lt_dump_script (FILE* f) +{ +EOF + func_emit_wrapper yes | + $SED -n -e ' +s/^\(.\{79\}\)\(..*\)/\1\ +\2/ +h +s/\([\\"]\)/\\\1/g +s/$/\\n/ +s/\([^\n]*\).*/ fputs ("\1", f);/p +g +D' + cat <<"EOF" +} +EOF +} +# end: func_emit_cwrapperexe_src + +# func_win32_import_lib_p ARG +# True if ARG is an import lib, as indicated by $file_magic_cmd +func_win32_import_lib_p () +{ + $debug_cmd + + case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in + *import*) : ;; + *) false ;; + esac +} + +# func_suncc_cstd_abi +# !!ONLY CALL THIS FOR SUN CC AFTER $compile_command IS FULLY EXPANDED!! +# Several compiler flags select an ABI that is incompatible with the +# Cstd library. Avoid specifying it if any are in CXXFLAGS. +func_suncc_cstd_abi () +{ + $debug_cmd + + case " $compile_command " in + *" -compat=g "*|*\ -std=c++[0-9][0-9]\ *|*" -library=stdcxx4 "*|*" -library=stlport4 "*) + suncc_use_cstd_abi=no + ;; + *) + suncc_use_cstd_abi=yes + ;; + esac +} + +# func_mode_link arg... +func_mode_link () +{ + $debug_cmd + + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + # It is impossible to link a dll without this setting, and + # we shouldn't force the makefile maintainer to figure out + # what system we are compiling for in order to pass an extra + # flag for every libtool invocation. + # allow_undefined=no + + # FIXME: Unfortunately, there are problems with the above when trying + # to make a dll that has undefined symbols, in which case not + # even a static library is built. For now, we need to specify + # -no-undefined on the libtool link line when we can be certain + # that all symbols are satisfied, otherwise we get a static library. + allow_undefined=yes + ;; + *) + allow_undefined=yes + ;; + esac + libtool_args=$nonopt + base_compile="$nonopt $@" + compile_command=$nonopt + finalize_command=$nonopt + + compile_rpath= + finalize_rpath= + compile_shlibpath= + finalize_shlibpath= + convenience= + old_convenience= + deplibs= + old_deplibs= + compiler_flags= + linker_flags= + dllsearchpath= + lib_search_path=`pwd` + inst_prefix_dir= + new_inherited_linker_flags= + + avoid_version=no + bindir= + dlfiles= + dlprefiles= + dlself=no + export_dynamic=no + export_symbols= + export_symbols_regex= + generated= + libobjs= + ltlibs= + module=no + no_install=no + objs= + os2dllname= + non_pic_objects= + precious_files_regex= + prefer_static_libs=no + preload=false + prev= + prevarg= + release= + rpath= + xrpath= + perm_rpath= + temp_rpath= + thread_safe=no + vinfo= + vinfo_number=no + weak_libs= + single_module=$wl-single_module + func_infer_tag $base_compile + + # We need to know -static, to get the right output filenames. + for arg + do + case $arg in + -shared) + test yes != "$build_libtool_libs" \ + && func_fatal_configuration "cannot build a shared library" + build_old_libs=no + break + ;; + -all-static | -static | -static-libtool-libs) + case $arg in + -all-static) + if test yes = "$build_libtool_libs" && test -z "$link_static_flag"; then + func_warning "complete static linking is impossible in this configuration" + fi + if test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + -static) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=built + ;; + -static-libtool-libs) + if test -z "$pic_flag" && test -n "$link_static_flag"; then + dlopen_self=$dlopen_self_static + fi + prefer_static_libs=yes + ;; + esac + build_libtool_libs=no + build_old_libs=yes + break + ;; + esac + done + + # See if our shared archives depend on static archives. + test -n "$old_archive_from_new_cmds" && build_old_libs=yes + + # Go through the arguments, transforming them on the way. + while test "$#" -gt 0; do + arg=$1 + shift + func_quote_arg pretty,unquoted "$arg" + qarg=$func_quote_arg_unquoted_result + func_append libtool_args " $func_quote_arg_result" + + # If the previous option needs an argument, assign it. + if test -n "$prev"; then + case $prev in + output) + func_append compile_command " @OUTPUT@" + func_append finalize_command " @OUTPUT@" + ;; + esac + + case $prev in + bindir) + bindir=$arg + prev= + continue + ;; + dlfiles|dlprefiles) + $preload || { + # Add the symbol object into the linking commands. + func_append compile_command " @SYMFILE@" + func_append finalize_command " @SYMFILE@" + preload=: + } + case $arg in + *.la | *.lo) ;; # We handle these cases below. + force) + if test no = "$dlself"; then + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + self) + if test dlprefiles = "$prev"; then + dlself=yes + elif test dlfiles = "$prev" && test yes != "$dlopen_self"; then + dlself=yes + else + dlself=needless + export_dynamic=yes + fi + prev= + continue + ;; + *) + if test dlfiles = "$prev"; then + func_append dlfiles " $arg" + else + func_append dlprefiles " $arg" + fi + prev= + continue + ;; + esac + ;; + expsyms) + export_symbols=$arg + test -f "$arg" \ + || func_fatal_error "symbol file '$arg' does not exist" + prev= + continue + ;; + expsyms_regex) + export_symbols_regex=$arg + prev= + continue + ;; + framework) + case $host in + *-*-darwin*) + case "$deplibs " in + *" $qarg.ltframework "*) ;; + *) func_append deplibs " $qarg.ltframework" # this is fixed later + ;; + esac + ;; + esac + prev= + continue + ;; + inst_prefix) + inst_prefix_dir=$arg + prev= + continue + ;; + mllvm) + # Clang does not use LLVM to link, so we can simply discard any + # '-mllvm $arg' options when doing the link step. + prev= + continue + ;; + objectlist) + if test -f "$arg"; then + save_arg=$arg + moreargs= + for fil in `cat "$save_arg"` + do +# func_append moreargs " $fil" + arg=$fil + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + if test none != "$pic_object"; then + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + fi + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + done + else + func_fatal_error "link input file '$arg' does not exist" + fi + arg=$save_arg + prev= + continue + ;; + os2dllname) + os2dllname=$arg + prev= + continue + ;; + precious_regex) + precious_files_regex=$arg + prev= + continue + ;; + release) + release=-$arg + prev= + continue + ;; + rpath | xrpath) + # We need an absolute path. + case $arg in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + if test rpath = "$prev"; then + case "$rpath " in + *" $arg "*) ;; + *) func_append rpath " $arg" ;; + esac + else + case "$xrpath " in + *" $arg "*) ;; + *) func_append xrpath " $arg" ;; + esac + fi + prev= + continue + ;; + shrext) + shrext_cmds=$arg + prev= + continue + ;; + weak) + func_append weak_libs " $arg" + prev= + continue + ;; + xassembler) + func_append compiler_flags " -Xassembler $qarg" + prev= + func_append compile_command " -Xassembler $qarg" + func_append finalize_command " -Xassembler $qarg" + continue + ;; + xcclinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xcompiler) + func_append compiler_flags " $qarg" + prev= + func_append compile_command " $qarg" + func_append finalize_command " $qarg" + continue + ;; + xlinker) + func_append linker_flags " $qarg" + func_append compiler_flags " $wl$qarg" + prev= + func_append compile_command " $wl$qarg" + func_append finalize_command " $wl$qarg" + continue + ;; + *) + eval "$prev=\"\$arg\"" + prev= + continue + ;; + esac + fi # test -n "$prev" + + prevarg=$arg + + case $arg in + -all-static) + if test -n "$link_static_flag"; then + # See comment for -static flag below, for more details. + func_append compile_command " $link_static_flag" + func_append finalize_command " $link_static_flag" + fi + continue + ;; + + -allow-undefined) + # FIXME: remove this flag sometime in the future. + func_fatal_error "'-allow-undefined' must not be used because it is the default" + ;; + + -avoid-version) + avoid_version=yes + continue + ;; + + -bindir) + prev=bindir + continue + ;; + + -dlopen) + prev=dlfiles + continue + ;; + + -dlpreopen) + prev=dlprefiles + continue + ;; + + -export-dynamic) + export_dynamic=yes + continue + ;; + + -export-symbols | -export-symbols-regex) + if test -n "$export_symbols" || test -n "$export_symbols_regex"; then + func_fatal_error "more than one -exported-symbols argument is not allowed" + fi + if test X-export-symbols = "X$arg"; then + prev=expsyms + else + prev=expsyms_regex + fi + continue + ;; + + -framework) + prev=framework + continue + ;; + + -inst-prefix-dir) + prev=inst_prefix + continue + ;; + + # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* + # so, if we see these flags be careful not to treat them like -L + -L[A-Z][A-Z]*:*) + case $with_gcc/$host in + no/*-*-irix* | /*-*-irix*) + func_append compile_command " $arg" + func_append finalize_command " $arg" + ;; + esac + continue + ;; + + -L*) + func_stripname "-L" '' "$arg" + if test -z "$func_stripname_result"; then + if test "$#" -gt 0; then + func_fatal_error "require no space between '-L' and '$1'" + else + func_fatal_error "need path for '-L' option" + fi + fi + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + *) + absdir=`cd "$dir" && pwd` + test -z "$absdir" && \ + func_fatal_error "cannot determine absolute directory name of '$dir'" + dir=$absdir + ;; + esac + case "$deplibs " in + *" -L$dir "* | *" $arg "*) + # Will only happen for absolute or sysroot arguments + ;; + *) + # Preserve sysroot, but never include relative directories + case $dir in + [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; + *) func_append deplibs " -L$dir" ;; + esac + func_append lib_search_path " $dir" + ;; + esac + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$dir:"*) ;; + ::) dllsearchpath=$dir;; + *) func_append dllsearchpath ":$dir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + continue + ;; + + -l*) + if test X-lc = "X$arg" || test X-lm = "X$arg"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) + # These systems don't actually have a C or math library (as such) + continue + ;; + *-*-os2*) + # These systems don't actually have a C library (as such) + test X-lc = "X$arg" && continue + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) + # Do not include libc due to us having libc/libc_r. + test X-lc = "X$arg" && continue + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C and math libraries are in the System framework + func_append deplibs " System.ltframework" + continue + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + test X-lc = "X$arg" && continue + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + test X-lc = "X$arg" && continue + ;; + esac + elif test X-lc_r = "X$arg"; then + case $host in + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*) + # Do not include libc_r directly, use -pthread flag. + continue + ;; + esac + fi + func_append deplibs " $arg" + continue + ;; + + -mllvm) + prev=mllvm + continue + ;; + + -module) + module=yes + continue + ;; + + # Tru64 UNIX uses -model [arg] to determine the layout of C++ + # classes, name mangling, and exception handling. + # Darwin uses the -arch flag to determine output architecture. + -model|-arch|-isysroot|--sysroot) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + prev=xcompiler + continue + ;; + # Solaris ld rejects as of 11.4. Refer to Oracle bug 22985199. + -pthread) + case $host in + *solaris2*) ;; + *) + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + ;; + esac + continue + ;; + -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + func_append compiler_flags " $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case "$new_inherited_linker_flags " in + *" $arg "*) ;; + * ) func_append new_inherited_linker_flags " $arg" ;; + esac + continue + ;; + + -multi_module) + single_module=$wl-multi_module + continue + ;; + + -no-fast-install) + fast_install=no + continue + ;; + + -no-install) + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) + # The PATH hackery in wrapper scripts is required on Windows + # and Darwin in order for the loader to find any dlls it needs. + func_warning "'-no-install' is ignored for $host" + func_warning "assuming '-no-fast-install' instead" + fast_install=no + ;; + *) no_install=yes ;; + esac + continue + ;; + + -no-undefined) + allow_undefined=no + continue + ;; + + -objectlist) + prev=objectlist + continue + ;; + + -os2dllname) + prev=os2dllname + continue + ;; + + -o) prev=output ;; + + -precious-files-regex) + prev=precious_regex + continue + ;; + + -release) + prev=release + continue + ;; + + -rpath) + prev=rpath + continue + ;; + + -R) + prev=xrpath + continue + ;; + + -R*) + func_stripname '-R' '' "$arg" + dir=$func_stripname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) ;; + =*) + func_stripname '=' '' "$dir" + dir=$lt_sysroot$func_stripname_result + ;; + *) + func_fatal_error "only absolute run-paths are allowed" + ;; + esac + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + continue + ;; + + -shared) + # The effects of -shared are defined in a previous loop. + continue + ;; + + -shrext) + prev=shrext + continue + ;; + + -static | -static-libtool-libs) + # The effects of -static are defined in a previous loop. + # We used to do the same as -all-static on platforms that + # didn't have a PIC flag, but the assumption that the effects + # would be equivalent was wrong. It would break on at least + # Digital Unix and AIX. + continue + ;; + + -thread-safe) + thread_safe=yes + continue + ;; + + -version-info) + prev=vinfo + continue + ;; + + -version-number) + prev=vinfo + vinfo_number=yes + continue + ;; + + -weak) + prev=weak + continue + ;; + + -Wc,*) + func_stripname '-Wc,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_arg pretty "$flag" + func_append arg " $func_quote_arg_result" + func_append compiler_flags " $func_quote_arg_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Wl,*) + func_stripname '-Wl,' '' "$arg" + args=$func_stripname_result + arg= + save_ifs=$IFS; IFS=, + for flag in $args; do + IFS=$save_ifs + func_quote_arg pretty "$flag" + func_append arg " $wl$func_quote_arg_result" + func_append compiler_flags " $wl$func_quote_arg_result" + func_append linker_flags " $func_quote_arg_result" + done + IFS=$save_ifs + func_stripname ' ' '' "$arg" + arg=$func_stripname_result + ;; + + -Xassembler) + prev=xassembler + continue + ;; + + -Xcompiler) + prev=xcompiler + continue + ;; + + -Xlinker) + prev=xlinker + continue + ;; + + -XCClinker) + prev=xcclinker + continue + ;; + + # -msg_* for osf cc + -msg_*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + + # Flags to be passed through unchanged, with rationale: + # -64, -mips[0-9] enable 64-bit mode for the SGI compiler + # -r[0-9][0-9]* specify processor for the SGI compiler + # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler + # +DA*, +DD* enable 64-bit mode for the HP compiler + # -q* compiler args for the IBM compiler + # -m*, -t[45]*, -txscale* architecture-specific flags for GCC + # -F/path path to uninstalled frameworks, gcc on darwin + # -p, -pg, --coverage, -fprofile-* profiling flags for GCC + # -fstack-protector* stack protector flags for GCC + # @file GCC response files + # -tp=* Portland pgcc target processor selection + # --sysroot=* for sysroot support + # -O*, -g*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization + # -specs=* GCC specs files + # -stdlib=* select c++ std lib with clang + # -fsanitize=* Clang/GCC memory and address sanitizer + # -fuse-ld=* Linker select flags for GCC + # -static-* direct GCC to link specific libraries statically + # -fcilkplus Cilk Plus language extension features for C/C++ + # -Wa,* Pass flags directly to the assembler + -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ + -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ + -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \ + -specs=*|-fsanitize=*|-fuse-ld=*|-static-*|-fcilkplus|-Wa,*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + func_append compile_command " $arg" + func_append finalize_command " $arg" + func_append compiler_flags " $arg" + continue + ;; + + -Z*) + if test os2 = "`expr $host : '.*\(os2\)'`"; then + # OS/2 uses -Zxxx to specify OS/2-specific options + compiler_flags="$compiler_flags $arg" + func_append compile_command " $arg" + func_append finalize_command " $arg" + case $arg in + -Zlinker | -Zstack) + prev=xcompiler + ;; + esac + continue + else + # Otherwise treat like 'Some other compiler flag' below + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + fi + ;; + + # Some other compiler flag. + -* | +*) + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + + *.$objext) + # A standard object. + func_append objs " $arg" + ;; + + *.lo) + # A libtool-controlled object. + + # Check to see that this really is a libtool object. + if func_lalib_unsafe_p "$arg"; then + pic_object= + non_pic_object= + + # Read the .lo file + func_source "$arg" + + if test -z "$pic_object" || + test -z "$non_pic_object" || + test none = "$pic_object" && + test none = "$non_pic_object"; then + func_fatal_error "cannot find name of object for '$arg'" + fi + + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + test none = "$pic_object" || { + # Prepend the subdirectory the object is found in. + pic_object=$xdir$pic_object + + if test dlfiles = "$prev"; then + if test yes = "$build_libtool_libs" && test yes = "$dlopen_support"; then + func_append dlfiles " $pic_object" + prev= + continue + else + # If libtool objects are unsupported, then we need to preload. + prev=dlprefiles + fi + fi + + # CHECK ME: I think I busted this. -Ossama + if test dlprefiles = "$prev"; then + # Preload the old-style object. + func_append dlprefiles " $pic_object" + prev= + fi + + # A PIC object. + func_append libobjs " $pic_object" + arg=$pic_object + } + + # Non-PIC object. + if test none != "$non_pic_object"; then + # Prepend the subdirectory the object is found in. + non_pic_object=$xdir$non_pic_object + + # A standard non-PIC object + func_append non_pic_objects " $non_pic_object" + if test -z "$pic_object" || test none = "$pic_object"; then + arg=$non_pic_object + fi + else + # If the PIC object exists, use it instead. + # $xdir was prepended to $pic_object above. + non_pic_object=$pic_object + func_append non_pic_objects " $non_pic_object" + fi + else + # Only an error if not doing a dry-run. + if $opt_dry_run; then + # Extract subdirectory from the argument. + func_dirname "$arg" "/" "" + xdir=$func_dirname_result + + func_lo2o "$arg" + pic_object=$xdir$objdir/$func_lo2o_result + non_pic_object=$xdir$func_lo2o_result + func_append libobjs " $pic_object" + func_append non_pic_objects " $non_pic_object" + else + func_fatal_error "'$arg' is not a valid libtool object" + fi + fi + ;; + + *.$libext) + # An archive. + func_append deplibs " $arg" + func_append old_deplibs " $arg" + continue + ;; + + *.la) + # A libtool-controlled library. + + func_resolve_sysroot "$arg" + if test dlfiles = "$prev"; then + # This library was specified with -dlopen. + func_append dlfiles " $func_resolve_sysroot_result" + prev= + elif test dlprefiles = "$prev"; then + # The library was specified with -dlpreopen. + func_append dlprefiles " $func_resolve_sysroot_result" + prev= + else + func_append deplibs " $func_resolve_sysroot_result" + fi + continue + ;; + + # Some other compiler argument. + *) + # Unknown arguments in both finalize_command and compile_command need + # to be aesthetically quoted because they are evaled later. + func_quote_arg pretty "$arg" + arg=$func_quote_arg_result + ;; + esac # arg + + # Now actually substitute the argument into the commands. + if test -n "$arg"; then + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + done # argument parsing loop + + test -n "$prev" && \ + func_fatal_help "the '$prevarg' option requires an argument" + + if test yes = "$export_dynamic" && test -n "$export_dynamic_flag_spec"; then + eval arg=\"$export_dynamic_flag_spec\" + func_append compile_command " $arg" + func_append finalize_command " $arg" + fi + + oldlibs= + # calculate the name of the file, without its directory + func_basename "$output" + outputname=$func_basename_result + libobjs_save=$libobjs + + if test -n "$shlibpath_var"; then + # get the directories listed in $shlibpath_var + eval shlib_search_path=\`\$ECHO \"\$$shlibpath_var\" \| \$SED \'s/:/ /g\'\` + else + shlib_search_path= + fi + eval sys_lib_search_path=\"$sys_lib_search_path_spec\" + eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" + + # Definition is injected by LT_CONFIG during libtool generation. + func_munge_path_list sys_lib_dlsearch_path "$LT_SYS_LIBRARY_PATH" + + func_dirname "$output" "/" "" + output_objdir=$func_dirname_result$objdir + func_to_tool_file "$output_objdir/" + tool_output_objdir=$func_to_tool_file_result + # Create the object directory. + func_mkdir_p "$output_objdir" + + # Determine the type of output + case $output in + "") + func_fatal_help "you must specify an output file" + ;; + *.$libext) linkmode=oldlib ;; + *.lo | *.$objext) linkmode=obj ;; + *.la) linkmode=lib ;; + *) linkmode=prog ;; # Anything else should be a program. + esac + + specialdeplibs= + + libs= + # Find all interdependent deplibs by searching for libraries + # that are linked more than once (e.g. -la -lb -la) + for deplib in $deplibs; do + if $opt_preserve_dup_deps; then + case "$libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append libs " $deplib" + done + + if test lib = "$linkmode"; then + libs="$predeps $libs $compiler_lib_search_path $postdeps" + + # Compute libraries that are listed more than once in $predeps + # $postdeps and mark them as special (i.e., whose duplicates are + # not to be eliminated). + pre_post_deps= + if $opt_duplicate_compiler_generated_deps; then + for pre_post_dep in $predeps $postdeps; do + case "$pre_post_deps " in + *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; + esac + func_append pre_post_deps " $pre_post_dep" + done + fi + pre_post_deps= + fi + + deplibs= + newdependency_libs= + newlib_search_path= + need_relink=no # whether we're linking any uninstalled libtool libraries + notinst_deplibs= # not-installed libtool libraries + notinst_path= # paths that contain not-installed libtool libraries + + case $linkmode in + lib) + passes="conv dlpreopen link" + for file in $dlfiles $dlprefiles; do + case $file in + *.la) ;; + *) + func_fatal_help "libraries can '-dlopen' only libtool libraries: $file" + ;; + esac + done + ;; + prog) + compile_deplibs= + finalize_deplibs= + alldeplibs=false + newdlfiles= + newdlprefiles= + passes="conv scan dlopen dlpreopen link" + ;; + *) passes="conv" + ;; + esac + + for pass in $passes; do + # The preopen pass in lib mode reverses $deplibs; put it back here + # so that -L comes before libs that need it for instance... + if test lib,link = "$linkmode,$pass"; then + ## FIXME: Find the place where the list is rebuilt in the wrong + ## order, and fix it there properly + tmp_deplibs= + for deplib in $deplibs; do + tmp_deplibs="$deplib $tmp_deplibs" + done + deplibs=$tmp_deplibs + fi + + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass"; then + libs=$deplibs + deplibs= + fi + if test prog = "$linkmode"; then + case $pass in + dlopen) libs=$dlfiles ;; + dlpreopen) libs=$dlprefiles ;; + link) + libs="$deplibs %DEPLIBS%" + test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" + ;; + esac + fi + if test lib,dlpreopen = "$linkmode,$pass"; then + # Collect and forward deplibs of preopened libtool libs + for lib in $dlprefiles; do + # Ignore non-libtool-libs + dependency_libs= + func_resolve_sysroot "$lib" + case $lib in + *.la) func_source "$func_resolve_sysroot_result" ;; + esac + + # Collect preopened libtool deplibs, except any this library + # has declared as weak libs + for deplib in $dependency_libs; do + func_basename "$deplib" + deplib_base=$func_basename_result + case " $weak_libs " in + *" $deplib_base "*) ;; + *) func_append deplibs " $deplib" ;; + esac + done + done + libs=$dlprefiles + fi + if test dlopen = "$pass"; then + # Collect dlpreopened libraries + save_deplibs=$deplibs + deplibs= + fi + + for deplib in $libs; do + lib= + found=false + case $deplib in + -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ + |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append compiler_flags " $deplib" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -l*) + if test lib != "$linkmode" && test prog != "$linkmode"; then + func_warning "'-l' is ignored for archives/objects" + continue + fi + func_stripname '-l' '' "$deplib" + name=$func_stripname_result + if test lib = "$linkmode"; then + searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" + else + searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" + fi + for searchdir in $searchdirs; do + for search_ext in .la $std_shrext .so .a; do + # Search the libtool library + lib=$searchdir/lib$name$search_ext + if test -f "$lib"; then + if test .la = "$search_ext"; then + found=: + else + found=false + fi + break 2 + fi + done + done + if $found; then + # deplib is a libtool library + # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, + # We need to do some special things here, and not later. + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $deplib "*) + if func_lalib_p "$lib"; then + library_names= + old_library= + func_source "$lib" + for l in $old_library $library_names; do + ll=$l + done + if test "X$ll" = "X$old_library"; then # only static version available + found=false + func_dirname "$lib" "" "." + ladir=$func_dirname_result + lib=$ladir/$old_library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + fi + ;; + *) ;; + esac + fi + else + # deplib doesn't seem to be a libtool library + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + test lib = "$linkmode" && newdependency_libs="$deplib $newdependency_libs" + fi + continue + fi + ;; # -l + *.ltframework) + if test prog,link = "$linkmode,$pass"; then + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + deplibs="$deplib $deplibs" + if test lib = "$linkmode"; then + case "$new_inherited_linker_flags " in + *" $deplib "*) ;; + * ) func_append new_inherited_linker_flags " $deplib" ;; + esac + fi + fi + continue + ;; + -L*) + case $linkmode in + lib) + deplibs="$deplib $deplibs" + test conv = "$pass" && continue + newdependency_libs="$deplib $newdependency_libs" + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + prog) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + if test scan = "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + *) + func_warning "'-L' is ignored for archives/objects" + ;; + esac # linkmode + continue + ;; # -L + -R*) + if test link = "$pass"; then + func_stripname '-R' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + dir=$func_resolve_sysroot_result + # Make sure the xrpath contains only unique directories. + case "$xrpath " in + *" $dir "*) ;; + *) func_append xrpath " $dir" ;; + esac + fi + deplibs="$deplib $deplibs" + continue + ;; + *.la) + func_resolve_sysroot "$deplib" + lib=$func_resolve_sysroot_result + ;; + *.$libext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + continue + fi + case $linkmode in + lib) + # Linking convenience modules into shared libraries is allowed, + # but linking other static libraries is non-portable. + case " $dlpreconveniencelibs " in + *" $deplib "*) ;; + *) + valid_a_lib=false + case $deplibs_check_method in + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ + | $EGREP "$match_pattern_regex" > /dev/null; then + valid_a_lib=: + fi + ;; + pass_all) + valid_a_lib=: + ;; + esac + if $valid_a_lib; then + echo + $ECHO "*** Warning: Linking the shared library $output against the" + $ECHO "*** static library $deplib is not portable!" + deplibs="$deplib $deplibs" + else + echo + $ECHO "*** Warning: Trying to link with static lib archive $deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because the file extensions .$libext of this argument makes me believe" + echo "*** that it is just a static archive that I should not use here." + fi + ;; + esac + continue + ;; + prog) + if test link != "$pass"; then + deplibs="$deplib $deplibs" + else + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + fi + continue + ;; + esac # linkmode + ;; # *.$libext + *.lo | *.$objext) + if test conv = "$pass"; then + deplibs="$deplib $deplibs" + elif test prog = "$linkmode"; then + if test dlpreopen = "$pass" || test yes != "$dlopen_support" || test no = "$build_libtool_libs"; then + # If there is no dlopen support or we're linking statically, + # we need to preload. + func_append newdlprefiles " $deplib" + compile_deplibs="$deplib $compile_deplibs" + finalize_deplibs="$deplib $finalize_deplibs" + else + func_append newdlfiles " $deplib" + fi + fi + continue + ;; + %DEPLIBS%) + alldeplibs=: + continue + ;; + esac # case $deplib + + $found || test -f "$lib" \ + || func_fatal_error "cannot find the library '$lib' or unhandled argument '$deplib'" + + # Check to see that this really is a libtool archive. + func_lalib_unsafe_p "$lib" \ + || func_fatal_error "'$lib' is not a valid libtool archive" + + func_dirname "$lib" "" "." + ladir=$func_dirname_result + + dlname= + dlopen= + dlpreopen= + libdir= + library_names= + old_library= + inherited_linker_flags= + # If the library was installed with an old release of libtool, + # it will not redefine variables installed, or shouldnotlink + installed=yes + shouldnotlink=no + avoidtemprpath= + + + # Read the .la file + func_source "$lib" + + # Convert "-framework foo" to "foo.ltframework" + if test -n "$inherited_linker_flags"; then + tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` + for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do + case " $new_inherited_linker_flags " in + *" $tmp_inherited_linker_flag "*) ;; + *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; + esac + done + fi + dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + if test lib,link = "$linkmode,$pass" || + test prog,scan = "$linkmode,$pass" || + { test prog != "$linkmode" && test lib != "$linkmode"; }; then + test -n "$dlopen" && func_append dlfiles " $dlopen" + test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" + fi + + if test conv = "$pass"; then + # Only check for convenience libraries + deplibs="$lib $deplibs" + if test -z "$libdir"; then + if test -z "$old_library"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + # It is a libtool convenience library, so add in its objects. + func_append convenience " $ladir/$objdir/$old_library" + func_append old_convenience " $ladir/$objdir/$old_library" + tmp_libs= + for deplib in $dependency_libs; do + deplibs="$deplib $deplibs" + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done + elif test prog != "$linkmode" && test lib != "$linkmode"; then + func_fatal_error "'$lib' is not a convenience library" + fi + continue + fi # $pass = conv + + + # Get the name of the library we link against. + linklib= + if test -n "$old_library" && + { test yes = "$prefer_static_libs" || + test built,no = "$prefer_static_libs,$installed"; }; then + linklib=$old_library + else + for l in $old_library $library_names; do + linklib=$l + done + fi + if test -z "$linklib"; then + func_fatal_error "cannot find name of link library for '$lib'" + fi + + # This library was specified with -dlopen. + if test dlopen = "$pass"; then + test -z "$libdir" \ + && func_fatal_error "cannot -dlopen a convenience library: '$lib'" + if test -z "$dlname" || + test yes != "$dlopen_support" || + test no = "$build_libtool_libs" + then + # If there is no dlname, no dlopen support or we're linking + # statically, we need to preload. We also need to preload any + # dependent libraries so libltdl's deplib preloader doesn't + # bomb out in the load deplibs phase. + func_append dlprefiles " $lib $dependency_libs" + else + func_append newdlfiles " $lib" + fi + continue + fi # $pass = dlopen + + # We need an absolute path. + case $ladir in + [\\/]* | [A-Za-z]:[\\/]*) abs_ladir=$ladir ;; + *) + abs_ladir=`cd "$ladir" && pwd` + if test -z "$abs_ladir"; then + func_warning "cannot determine absolute directory name of '$ladir'" + func_warning "passing it literally to the linker, although it might fail" + abs_ladir=$ladir + fi + ;; + esac + func_basename "$lib" + laname=$func_basename_result + + # Find the relevant object directory and library name. + if test yes = "$installed"; then + if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then + func_warning "library '$lib' was moved." + dir=$ladir + absdir=$abs_ladir + libdir=$abs_ladir + else + dir=$lt_sysroot$libdir + absdir=$lt_sysroot$libdir + fi + test yes = "$hardcode_automatic" && avoidtemprpath=yes + else + if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then + dir=$ladir + absdir=$abs_ladir + # Remove this search path later + func_append notinst_path " $abs_ladir" + else + dir=$ladir/$objdir + absdir=$abs_ladir/$objdir + # Remove this search path later + func_append notinst_path " $abs_ladir" + fi + fi # $installed = yes + func_stripname 'lib' '.la' "$laname" + name=$func_stripname_result + + # This library was specified with -dlpreopen. + if test dlpreopen = "$pass"; then + if test -z "$libdir" && test prog = "$linkmode"; then + func_fatal_error "only libraries may -dlpreopen a convenience library: '$lib'" + fi + case $host in + # special handling for platforms with PE-DLLs. + *cygwin* | *mingw* | *cegcc* ) + # Linker will automatically link against shared library if both + # static and shared are present. Therefore, ensure we extract + # symbols from the import library if a shared library is present + # (otherwise, the dlopen module name will be incorrect). We do + # this by putting the import library name into $newdlprefiles. + # We recover the dlopen module name by 'saving' the la file + # name in a special purpose variable, and (later) extracting the + # dlname from the la file. + if test -n "$dlname"; then + func_tr_sh "$dir/$linklib" + eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" + func_append newdlprefiles " $dir/$linklib" + else + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + fi + ;; + * ) + # Prefer using a static library (so that no silly _DYNAMIC symbols + # are required to link). + if test -n "$old_library"; then + func_append newdlprefiles " $dir/$old_library" + # Keep a list of preopened convenience libraries to check + # that they are being used correctly in the link pass. + test -z "$libdir" && \ + func_append dlpreconveniencelibs " $dir/$old_library" + # Otherwise, use the dlname, so that lt_dlopen finds it. + elif test -n "$dlname"; then + func_append newdlprefiles " $dir/$dlname" + else + func_append newdlprefiles " $dir/$linklib" + fi + ;; + esac + fi # $pass = dlpreopen + + if test -z "$libdir"; then + # Link the convenience library + if test lib = "$linkmode"; then + deplibs="$dir/$old_library $deplibs" + elif test prog,link = "$linkmode,$pass"; then + compile_deplibs="$dir/$old_library $compile_deplibs" + finalize_deplibs="$dir/$old_library $finalize_deplibs" + else + deplibs="$lib $deplibs" # used for prog,scan pass + fi + continue + fi + + + if test prog = "$linkmode" && test link != "$pass"; then + func_append newlib_search_path " $ladir" + deplibs="$lib $deplibs" + + linkalldeplibs=false + if test no != "$link_all_deplibs" || test -z "$library_names" || + test no = "$build_libtool_libs"; then + linkalldeplibs=: + fi + + tmp_libs= + for deplib in $dependency_libs; do + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result" + func_append newlib_search_path " $func_resolve_sysroot_result" + ;; + esac + # Need to link against all dependency_libs? + if $linkalldeplibs; then + deplibs="$deplib $deplibs" + else + # Need to hardcode shared library paths + # or/and link against static libraries + newdependency_libs="$deplib $newdependency_libs" + fi + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $deplib "*) func_append specialdeplibs " $deplib" ;; + esac + fi + func_append tmp_libs " $deplib" + done # for deplib + continue + fi # $linkmode = prog... + + if test prog,link = "$linkmode,$pass"; then + if test -n "$library_names" && + { { test no = "$prefer_static_libs" || + test built,yes = "$prefer_static_libs,$installed"; } || + test -z "$old_library"; }; then + # We need to hardcode the library path + if test -n "$shlibpath_var" && test -z "$avoidtemprpath"; then + # Make sure the rpath contains only unique directories. + case $temp_rpath: in + *"$absdir:"*) ;; + *) func_append temp_rpath "$absdir:" ;; + esac + fi + + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi # $linkmode,$pass = prog,link... + + if $alldeplibs && + { test pass_all = "$deplibs_check_method" || + { test yes = "$build_libtool_libs" && + test -n "$library_names"; }; }; then + # We only need to search for static libraries + continue + fi + fi + + link_static=no # Whether the deplib will be linked statically + use_static_libs=$prefer_static_libs + if test built = "$use_static_libs" && test yes = "$installed"; then + use_static_libs=no + fi + if test -n "$library_names" && + { test no = "$use_static_libs" || test -z "$old_library"; }; then + case $host in + *cygwin* | *mingw* | *cegcc* | *os2*) + # No point in relinking DLLs because paths are not encoded + func_append notinst_deplibs " $lib" + need_relink=no + ;; + *) + if test no = "$installed"; then + func_append notinst_deplibs " $lib" + need_relink=yes + fi + ;; + esac + # This is a shared library + + # Warn about portability, can't link against -module's on some + # systems (darwin). Don't bleat about dlopened modules though! + dlopenmodule= + for dlpremoduletest in $dlprefiles; do + if test "X$dlpremoduletest" = "X$lib"; then + dlopenmodule=$dlpremoduletest + break + fi + done + if test -z "$dlopenmodule" && test yes = "$shouldnotlink" && test link = "$pass"; then + echo + if test prog = "$linkmode"; then + $ECHO "*** Warning: Linking the executable $output against the loadable module" + else + $ECHO "*** Warning: Linking the shared library $output against the loadable module" + fi + $ECHO "*** $linklib is not portable!" + fi + if test lib = "$linkmode" && + test yes = "$hardcode_into_libs"; then + # Hardcode the library path. + # Skip directories that are in the system default run-time + # search path. + case " $sys_lib_dlsearch_path " in + *" $absdir "*) ;; + *) + case "$compile_rpath " in + *" $absdir "*) ;; + *) func_append compile_rpath " $absdir" ;; + esac + ;; + esac + case " $sys_lib_dlsearch_path " in + *" $libdir "*) ;; + *) + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + ;; + esac + fi + + if test -n "$old_archive_from_expsyms_cmds"; then + # figure out the soname + set dummy $library_names + shift + realname=$1 + shift + libname=`eval "\\$ECHO \"$libname_spec\""` + # use dlname if we got it. it's perfectly good, no? + if test -n "$dlname"; then + soname=$dlname + elif test -n "$soname_spec"; then + # bleh windows + case $host in + *cygwin* | mingw* | *cegcc* | *os2*) + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + esac + eval soname=\"$soname_spec\" + else + soname=$realname + fi + + # Make a new name for the extract_expsyms_cmds to use + soroot=$soname + func_basename "$soroot" + soname=$func_basename_result + func_stripname 'lib' '.dll' "$soname" + newlib=libimp-$func_stripname_result.a + + # If the library has no export list, then create one now + if test -f "$output_objdir/$soname-def"; then : + else + func_verbose "extracting exported symbol list from '$soname'" + func_execute_cmds "$extract_expsyms_cmds" 'exit $?' + fi + + # Create $newlib + if test -f "$output_objdir/$newlib"; then :; else + func_verbose "generating import library for '$soname'" + func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' + fi + # make sure the library variables are pointing to the new library + dir=$output_objdir + linklib=$newlib + fi # test -n "$old_archive_from_expsyms_cmds" + + if test prog = "$linkmode" || test relink != "$opt_mode"; then + add_shlibpath= + add_dir= + add= + lib_linked=yes + case $hardcode_action in + immediate | unsupported) + if test no = "$hardcode_direct"; then + add=$dir/$linklib + case $host in + *-*-sco3.2v5.0.[024]*) add_dir=-L$dir ;; + *-*-sysv4*uw2*) add_dir=-L$dir ;; + *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ + *-*-unixware7*) add_dir=-L$dir ;; + *-*-darwin* ) + # if the lib is a (non-dlopened) module then we cannot + # link against it, someone is ignoring the earlier warnings + if /usr/bin/file -L $add 2> /dev/null | + $GREP ": [^:]* bundle" >/dev/null; then + if test "X$dlopenmodule" != "X$lib"; then + $ECHO "*** Warning: lib $linklib is a module, not a shared library" + if test -z "$old_library"; then + echo + echo "*** And there doesn't seem to be a static archive available" + echo "*** The link will probably fail, sorry" + else + add=$dir/$old_library + fi + elif test -n "$old_library"; then + add=$dir/$old_library + fi + fi + esac + elif test no = "$hardcode_minus_L"; then + case $host in + *-*-sunos*) add_shlibpath=$dir ;; + esac + add_dir=-L$dir + add=-l$name + elif test no = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + relink) + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$dir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$absdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + add_shlibpath=$dir + add=-l$name + else + lib_linked=no + fi + ;; + *) lib_linked=no ;; + esac + + if test yes != "$lib_linked"; then + func_fatal_configuration "unsupported hardcode properties" + fi + + if test -n "$add_shlibpath"; then + case :$compile_shlibpath: in + *":$add_shlibpath:"*) ;; + *) func_append compile_shlibpath "$add_shlibpath:" ;; + esac + fi + if test prog = "$linkmode"; then + test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" + test -n "$add" && compile_deplibs="$add $compile_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + if test yes != "$hardcode_direct" && + test yes != "$hardcode_minus_L" && + test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + fi + fi + fi + + if test prog = "$linkmode" || test relink = "$opt_mode"; then + add_shlibpath= + add_dir= + add= + # Finalize command for both is simple: just hardcode it. + if test yes = "$hardcode_direct" && + test no = "$hardcode_direct_absolute"; then + add=$libdir/$linklib + elif test yes = "$hardcode_minus_L"; then + add_dir=-L$libdir + add=-l$name + elif test yes = "$hardcode_shlibpath_var"; then + case :$finalize_shlibpath: in + *":$libdir:"*) ;; + *) func_append finalize_shlibpath "$libdir:" ;; + esac + add=-l$name + elif test yes = "$hardcode_automatic"; then + if test -n "$inst_prefix_dir" && + test -f "$inst_prefix_dir$libdir/$linklib"; then + add=$inst_prefix_dir$libdir/$linklib + else + add=$libdir/$linklib + fi + else + # We cannot seem to hardcode it, guess we'll fake it. + add_dir=-L$libdir + # Try looking first in the location we're being installed to. + if test -n "$inst_prefix_dir"; then + case $libdir in + [\\/]*) + func_append add_dir " -L$inst_prefix_dir$libdir" + ;; + esac + fi + add=-l$name + fi + + if test prog = "$linkmode"; then + test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" + test -n "$add" && finalize_deplibs="$add $finalize_deplibs" + else + test -n "$add_dir" && deplibs="$add_dir $deplibs" + test -n "$add" && deplibs="$add $deplibs" + fi + fi + elif test prog = "$linkmode"; then + # Here we assume that one of hardcode_direct or hardcode_minus_L + # is not unsupported. This is valid on all known static and + # shared platforms. + if test unsupported != "$hardcode_direct"; then + test -n "$old_library" && linklib=$old_library + compile_deplibs="$dir/$linklib $compile_deplibs" + finalize_deplibs="$dir/$linklib $finalize_deplibs" + else + compile_deplibs="-l$name -L$dir $compile_deplibs" + finalize_deplibs="-l$name -L$dir $finalize_deplibs" + fi + elif test yes = "$build_libtool_libs"; then + # Not a shared library + if test pass_all != "$deplibs_check_method"; then + # We're trying link a shared library against a static one + # but the system doesn't support it. + + # Just print a warning and add the library to dependency_libs so + # that the program can be linked against the static library. + echo + $ECHO "*** Warning: This system cannot link to static lib archive $lib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have." + if test yes = "$module"; then + echo "*** But as you try to build a module library, libtool will still create " + echo "*** a static module, that should work as long as the dlopening application" + echo "*** is linked with the -dlopen flag to resolve symbols at runtime." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + else + deplibs="$dir/$old_library $deplibs" + link_static=yes + fi + fi # link shared/static library? + + if test lib = "$linkmode"; then + if test -n "$dependency_libs" && + { test yes != "$hardcode_into_libs" || + test yes = "$build_old_libs" || + test yes = "$link_static"; }; then + # Extract -R from dependency_libs + temp_deplibs= + for libdir in $dependency_libs; do + case $libdir in + -R*) func_stripname '-R' '' "$libdir" + temp_xrpath=$func_stripname_result + case " $xrpath " in + *" $temp_xrpath "*) ;; + *) func_append xrpath " $temp_xrpath";; + esac;; + *) func_append temp_deplibs " $libdir";; + esac + done + dependency_libs=$temp_deplibs + fi + + func_append newlib_search_path " $absdir" + # Link against this library + test no = "$link_static" && newdependency_libs="$abs_ladir/$laname $newdependency_libs" + # ... and its dependency_libs + tmp_libs= + for deplib in $dependency_libs; do + newdependency_libs="$deplib $newdependency_libs" + case $deplib in + -L*) func_stripname '-L' '' "$deplib" + func_resolve_sysroot "$func_stripname_result";; + *) func_resolve_sysroot "$deplib" ;; + esac + if $opt_preserve_dup_deps; then + case "$tmp_libs " in + *" $func_resolve_sysroot_result "*) + func_append specialdeplibs " $func_resolve_sysroot_result" ;; + esac + fi + func_append tmp_libs " $func_resolve_sysroot_result" + done + + if test no != "$link_all_deplibs"; then + # Add the search paths of all dependency libraries + for deplib in $dependency_libs; do + path= + case $deplib in + -L*) path=$deplib ;; + *.la) + func_resolve_sysroot "$deplib" + deplib=$func_resolve_sysroot_result + func_dirname "$deplib" "" "." + dir=$func_dirname_result + # We need an absolute path. + case $dir in + [\\/]* | [A-Za-z]:[\\/]*) absdir=$dir ;; + *) + absdir=`cd "$dir" && pwd` + if test -z "$absdir"; then + func_warning "cannot determine absolute directory name of '$dir'" + absdir=$dir + fi + ;; + esac + if $GREP "^installed=no" $deplib > /dev/null; then + case $host in + *-*-darwin*) + depdepl= + eval deplibrary_names=`$SED -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` + if test -n "$deplibrary_names"; then + for tmp in $deplibrary_names; do + depdepl=$tmp + done + if test -f "$absdir/$objdir/$depdepl"; then + depdepl=$absdir/$objdir/$depdepl + darwin_install_name=`$OTOOL -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + if test -z "$darwin_install_name"; then + darwin_install_name=`$OTOOL64 -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` + fi + func_append compiler_flags " $wl-dylib_file $wl$darwin_install_name:$depdepl" + func_append linker_flags " -dylib_file $darwin_install_name:$depdepl" + path= + fi + fi + ;; + *) + path=-L$absdir/$objdir + ;; + esac + else + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + test "$absdir" != "$libdir" && \ + func_warning "'$deplib' seems to be moved" + + path=-L$absdir + fi + ;; + esac + case " $deplibs " in + *" $path "*) ;; + *) deplibs="$path $deplibs" ;; + esac + done + fi # link_all_deplibs != no + fi # linkmode = lib + done # for deplib in $libs + if test link = "$pass"; then + if test prog = "$linkmode"; then + compile_deplibs="$new_inherited_linker_flags $compile_deplibs" + finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" + else + compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + fi + fi + dependency_libs=$newdependency_libs + if test dlpreopen = "$pass"; then + # Link the dlpreopened libraries before other libraries + for deplib in $save_deplibs; do + deplibs="$deplib $deplibs" + done + fi + if test dlopen != "$pass"; then + test conv = "$pass" || { + # Make sure lib_search_path contains only unique directories. + lib_search_path= + for dir in $newlib_search_path; do + case "$lib_search_path " in + *" $dir "*) ;; + *) func_append lib_search_path " $dir" ;; + esac + done + newlib_search_path= + } + + if test prog,link = "$linkmode,$pass"; then + vars="compile_deplibs finalize_deplibs" + else + vars=deplibs + fi + for var in $vars dependency_libs; do + # Add libraries to $var in reverse order + eval tmp_libs=\"\$$var\" + new_libs= + for deplib in $tmp_libs; do + # FIXME: Pedantically, this is the right thing to do, so + # that some nasty dependency loop isn't accidentally + # broken: + #new_libs="$deplib $new_libs" + # Pragmatically, this seems to cause very few problems in + # practice: + case $deplib in + -L*) new_libs="$deplib $new_libs" ;; + -R*) ;; + *) + # And here is the reason: when a library appears more + # than once as an explicit dependence of a library, or + # is implicitly linked in more than once by the + # compiler, it is considered special, and multiple + # occurrences thereof are not removed. Compare this + # with having the same library being listed as a + # dependency of multiple other libraries: in this case, + # we know (pedantically, we assume) the library does not + # need to be listed more than once, so we keep only the + # last copy. This is not always right, but it is rare + # enough that we require users that really mean to play + # such unportable linking tricks to link the library + # using -Wl,-lname, so that libtool does not consider it + # for duplicate removal. + case " $specialdeplibs " in + *" $deplib "*) new_libs="$deplib $new_libs" ;; + *) + case " $new_libs " in + *" $deplib "*) ;; + *) new_libs="$deplib $new_libs" ;; + esac + ;; + esac + ;; + esac + done + tmp_libs= + for deplib in $new_libs; do + case $deplib in + -L*) + case " $tmp_libs " in + *" $deplib "*) ;; + *) func_append tmp_libs " $deplib" ;; + esac + ;; + *) func_append tmp_libs " $deplib" ;; + esac + done + eval $var=\"$tmp_libs\" + done # for var + fi + + # Add Sun CC postdeps if required: + test CXX = "$tagname" && { + case $host_os in + linux*) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C++ 5.9 + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + + solaris*) + func_cc_basename "$CC" + case $func_cc_basename_result in + CC* | sunCC*) + func_suncc_cstd_abi + + if test no != "$suncc_use_cstd_abi"; then + func_append postdeps ' -library=Cstd -library=Crun' + fi + ;; + esac + ;; + esac + } + + # Last step: remove runtime libs from dependency_libs + # (they stay in deplibs) + tmp_libs= + for i in $dependency_libs; do + case " $predeps $postdeps $compiler_lib_search_path " in + *" $i "*) + i= + ;; + esac + if test -n "$i"; then + func_append tmp_libs " $i" + fi + done + dependency_libs=$tmp_libs + done # for pass + if test prog = "$linkmode"; then + dlfiles=$newdlfiles + fi + if test prog = "$linkmode" || test lib = "$linkmode"; then + dlprefiles=$newdlprefiles + fi + + case $linkmode in + oldlib) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for archives" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for archives" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for archives" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for archives" + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for archives" + + test -n "$release" && \ + func_warning "'-release' is ignored for archives" + + test -n "$export_symbols$export_symbols_regex" && \ + func_warning "'-export-symbols' is ignored for archives" + + # Now set the variables for building old libraries. + build_libtool_libs=no + oldlibs=$output + func_append objs "$old_deplibs" + ;; + + lib) + # Make sure we only generate libraries of the form 'libNAME.la'. + case $outputname in + lib*) + func_stripname 'lib' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + ;; + *) + test no = "$module" \ + && func_fatal_help "libtool library '$output' must begin with 'lib'" + + if test no != "$need_lib_prefix"; then + # Add the "lib" prefix for modules if required + func_stripname '' '.la' "$outputname" + name=$func_stripname_result + eval shared_ext=\"$shrext_cmds\" + eval libname=\"$libname_spec\" + else + func_stripname '' '.la' "$outputname" + libname=$func_stripname_result + fi + ;; + esac + + if test -n "$objs"; then + if test pass_all != "$deplibs_check_method"; then + func_fatal_error "cannot build libtool library '$output' from non-libtool objects on this host:$objs" + else + echo + $ECHO "*** Warning: Linking the shared library $output against the non-libtool" + $ECHO "*** objects $objs is not portable!" + func_append libobjs " $objs" + fi + fi + + test no = "$dlself" \ + || func_warning "'-dlopen self' is ignored for libtool libraries" + + set dummy $rpath + shift + test 1 -lt "$#" \ + && func_warning "ignoring multiple '-rpath's for a libtool library" + + install_libdir=$1 + + oldlibs= + if test -z "$rpath"; then + if test yes = "$build_libtool_libs"; then + # Building a libtool convenience library. + # Some compilers have problems with a '.al' extension so + # convenience libraries should have the same extension an + # archive normally would. + oldlibs="$output_objdir/$libname.$libext $oldlibs" + build_libtool_libs=convenience + build_old_libs=yes + fi + + test -n "$vinfo" && \ + func_warning "'-version-info/-version-number' is ignored for convenience libraries" + + test -n "$release" && \ + func_warning "'-release' is ignored for convenience libraries" + else + + # Parse the version information argument. + save_ifs=$IFS; IFS=: + set dummy $vinfo 0 0 0 + shift + IFS=$save_ifs + + test -n "$7" && \ + func_fatal_help "too many parameters to '-version-info'" + + # convert absolute version numbers to libtool ages + # this retains compatibility with .la files and attempts + # to make the code below a bit more comprehensible + + case $vinfo_number in + yes) + number_major=$1 + number_minor=$2 + number_revision=$3 + # + # There are really only two kinds -- those that + # use the current revision as the major version + # and those that subtract age and use age as + # a minor version. But, then there is irix + # that has an extra 1 added just for fun + # + case $version_type in + # correct linux to gnu/linux during the next big refactor + darwin|freebsd-elf|linux|midnightbsd-elf|osf|windows|none) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_revision + ;; + freebsd-aout|qnx|sunos) + current=$number_major + revision=$number_minor + age=0 + ;; + irix|nonstopux) + func_arith $number_major + $number_minor + current=$func_arith_result + age=$number_minor + revision=$number_minor + lt_irix_increment=no + ;; + *) + func_fatal_configuration "$modename: unknown library version type '$version_type'" + ;; + esac + ;; + no) + current=$1 + revision=$2 + age=$3 + ;; + esac + + # Check that each of the things are valid numbers. + case $current in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "CURRENT '$current' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $revision in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "REVISION '$revision' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + case $age in + 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; + *) + func_error "AGE '$age' must be a nonnegative integer" + func_fatal_error "'$vinfo' is not valid version information" + ;; + esac + + if test "$age" -gt "$current"; then + func_error "AGE '$age' is greater than the current interface number '$current'" + func_fatal_error "'$vinfo' is not valid version information" + fi + + # Calculate the version variables. + major= + versuffix= + verstring= + case $version_type in + none) ;; + + darwin) + # Like Linux, but with the current version available in + # verstring for coding it into the library header + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + # Darwin ld doesn't like 0 for these options... + func_arith $current + 1 + minor_current=$func_arith_result + xlcverstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + # On Darwin other compilers + case $CC in + nagfor*) + verstring="$wl-compatibility_version $wl$minor_current $wl-current_version $wl$minor_current.$revision" + ;; + *) + verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" + ;; + esac + ;; + + freebsd-aout) + major=.$current + versuffix=.$current.$revision + ;; + + freebsd-elf | midnightbsd-elf) + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + irix | nonstopux) + if test no = "$lt_irix_increment"; then + func_arith $current - $age + else + func_arith $current - $age + 1 + fi + major=$func_arith_result + + case $version_type in + nonstopux) verstring_prefix=nonstopux ;; + *) verstring_prefix=sgi ;; + esac + verstring=$verstring_prefix$major.$revision + + # Add in all the interfaces that we are compatible with. + loop=$revision + while test 0 -ne "$loop"; do + func_arith $revision - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring_prefix$major.$iface:$verstring + done + + # Before this point, $major must not contain '.'. + major=.$major + versuffix=$major.$revision + ;; + + linux) # correct to gnu/linux during the next big refactor + func_arith $current - $age + major=.$func_arith_result + versuffix=$major.$age.$revision + ;; + + osf) + func_arith $current - $age + major=.$func_arith_result + versuffix=.$current.$age.$revision + verstring=$current.$age.$revision + + # Add in all the interfaces that we are compatible with. + loop=$age + while test 0 -ne "$loop"; do + func_arith $current - $loop + iface=$func_arith_result + func_arith $loop - 1 + loop=$func_arith_result + verstring=$verstring:$iface.0 + done + + # Make executables depend on our current version. + func_append verstring ":$current.0" + ;; + + qnx) + major=.$current + versuffix=.$current + ;; + + sco) + major=.$current + versuffix=.$current + ;; + + sunos) + major=.$current + versuffix=.$current.$revision + ;; + + windows) + # Use '-' rather than '.', since we only want one + # extension on DOS 8.3 file systems. + func_arith $current - $age + major=$func_arith_result + versuffix=-$major + ;; + + *) + func_fatal_configuration "unknown library version type '$version_type'" + ;; + esac + + # Clear the version info if we defaulted, and they specified a release. + if test -z "$vinfo" && test -n "$release"; then + major= + case $version_type in + darwin) + # we can't check for "0.0" in archive_cmds due to quoting + # problems, so we reset it completely + verstring= + ;; + *) + verstring=0.0 + ;; + esac + if test no = "$need_version"; then + versuffix= + else + versuffix=.0.0 + fi + fi + + # Remove version info from name if versioning should be avoided + if test yes,no = "$avoid_version,$need_version"; then + major= + versuffix= + verstring= + fi + + # Check to see if the archive will have undefined symbols. + if test yes = "$allow_undefined"; then + if test unsupported = "$allow_undefined_flag"; then + if test yes = "$build_old_libs"; then + func_warning "undefined symbols not allowed in $host shared libraries; building static only" + build_libtool_libs=no + else + func_fatal_error "can't build $host shared library unless -no-undefined is specified" + fi + fi + else + # Don't allow undefined symbols. + allow_undefined_flag=$no_undefined_flag + fi + + fi + + func_generate_dlsyms "$libname" "$libname" : + func_append libobjs " $symfileobj" + test " " = "$libobjs" && libobjs= + + if test relink != "$opt_mode"; then + # Remove our outputs, but don't remove object files since they + # may have been created when compiling PIC objects. + removelist= + tempremovelist=`$ECHO "$output_objdir/*"` + for p in $tempremovelist; do + case $p in + *.$objext | *.gcno) + ;; + $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/$libname$release.*) + if test -n "$precious_files_regex"; then + if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 + then + continue + fi + fi + func_append removelist " $p" + ;; + *) ;; + esac + done + test -n "$removelist" && \ + func_show_eval "${RM}r \$removelist" + fi + + # Now set the variables for building old libraries. + if test yes = "$build_old_libs" && test convenience != "$build_libtool_libs"; then + func_append oldlibs " $output_objdir/$libname.$libext" + + # Transform .lo files to .o files. + oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; $lo2o" | $NL2SP` + fi + + # Eliminate all temporary directories. + #for path in $notinst_path; do + # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` + # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` + # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` + #done + + if test -n "$xrpath"; then + # If the user specified any rpath flags, then add them. + temp_xrpath= + for libdir in $xrpath; do + func_replace_sysroot "$libdir" + func_append temp_xrpath " -R$func_replace_sysroot_result" + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + if test yes != "$hardcode_into_libs" || test yes = "$build_old_libs"; then + dependency_libs="$temp_xrpath $dependency_libs" + fi + fi + + # Make sure dlfiles contains only unique files that won't be dlpreopened + old_dlfiles=$dlfiles + dlfiles= + for lib in $old_dlfiles; do + case " $dlprefiles $dlfiles " in + *" $lib "*) ;; + *) func_append dlfiles " $lib" ;; + esac + done + + # Make sure dlprefiles contains only unique files + old_dlprefiles=$dlprefiles + dlprefiles= + for lib in $old_dlprefiles; do + case "$dlprefiles " in + *" $lib "*) ;; + *) func_append dlprefiles " $lib" ;; + esac + done + + if test yes = "$build_libtool_libs"; then + if test -n "$rpath"; then + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) + # these systems don't actually have a c library (as such)! + ;; + *-*-rhapsody* | *-*-darwin1.[012]) + # Rhapsody C library is in the System framework + func_append deplibs " System.ltframework" + ;; + *-*-netbsd*) + # Don't link with libc until the a.out ld.so is fixed. + ;; + *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*) + # Do not include libc due to us having libc/libc_r. + ;; + *-*-sco3.2v5* | *-*-sco5v6*) + # Causes problems with __ctype + ;; + *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) + # Compiler inserts libc in the correct place for threads to work + ;; + *) + # Add libc to deplibs on all other systems if necessary. + if test yes = "$build_libtool_need_lc"; then + func_append deplibs " -lc" + fi + ;; + esac + fi + + # Transform deplibs into only deplibs that can be linked in shared. + name_save=$name + libname_save=$libname + release_save=$release + versuffix_save=$versuffix + major_save=$major + # I'm not sure if I'm treating the release correctly. I think + # release should show up in the -l (ie -lgmp5) so we don't want to + # add it in twice. Is that correct? + release= + versuffix= + major= + newdeplibs= + droppeddeps=no + case $deplibs_check_method in + pass_all) + # Don't check for shared/static. Everything works. + # This might be a little naive. We might want to check + # whether the library exists or not. But this is on + # osf3 & osf4 and I'm not really sure... Just + # implementing what was already the behavior. + newdeplibs=$deplibs + ;; + test_compile) + # This code stresses the "libraries are programs" paradigm to its + # limits. Maybe even breaks it. We compile a program, linking it + # against the deplibs as a proxy for the library. Then we can check + # whether they linked in statically or dynamically with ldd. + $opt_dry_run || $RM conftest.c + cat > conftest.c </dev/null` + $nocaseglob + else + potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` + fi + for potent_lib in $potential_libs; do + # Follow soft links. + if ls -lLd "$potent_lib" 2>/dev/null | + $GREP " -> " >/dev/null; then + continue + fi + # The statement above tries to avoid entering an + # endless loop below, in case of cyclic links. + # We might still enter an endless loop, since a link + # loop can be closed while we follow links, + # but so what? + potlib=$potent_lib + while test -h "$potlib" 2>/dev/null; do + potliblink=`ls -ld $potlib | $SED 's/.* -> //'` + case $potliblink in + [\\/]* | [A-Za-z]:[\\/]*) potlib=$potliblink;; + *) potlib=`$ECHO "$potlib" | $SED 's|[^/]*$||'`"$potliblink";; + esac + done + if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | + $SED -e 10q | + $EGREP "$file_magic_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + $ECHO "*** with $libname but no candidates were found. (...for file magic test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a file magic. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + match_pattern*) + set dummy $deplibs_check_method; shift + match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` + for a_deplib in $deplibs; do + case $a_deplib in + -l*) + func_stripname -l '' "$a_deplib" + name=$func_stripname_result + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + case " $predeps $postdeps " in + *" $a_deplib "*) + func_append newdeplibs " $a_deplib" + a_deplib= + ;; + esac + fi + if test -n "$a_deplib"; then + libname=`eval "\\$ECHO \"$libname_spec\""` + for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do + potential_libs=`ls $i/$libname[.-]* 2>/dev/null` + for potent_lib in $potential_libs; do + potlib=$potent_lib # see symlink-check above in file_magic test + if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ + $EGREP "$match_pattern_regex" > /dev/null; then + func_append newdeplibs " $a_deplib" + a_deplib= + break 2 + fi + done + done + fi + if test -n "$a_deplib"; then + droppeddeps=yes + echo + $ECHO "*** Warning: linker path does not have real file for library $a_deplib." + echo "*** I have the capability to make that library automatically link in when" + echo "*** you link to this library. But I can only do this if you have a" + echo "*** shared version of the library, which you do not appear to have" + echo "*** because I did check the linker path looking for a file starting" + if test -z "$potlib"; then + $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" + else + $ECHO "*** with $libname and none of the candidates passed a file format test" + $ECHO "*** using a regex pattern. Last file checked: $potlib" + fi + fi + ;; + *) + # Add a -L argument. + func_append newdeplibs " $a_deplib" + ;; + esac + done # Gone through all deplibs. + ;; + none | unknown | *) + newdeplibs= + tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` + if test yes = "$allow_libtool_libs_with_static_runtimes"; then + for i in $predeps $postdeps; do + # can't use Xsed below, because $i might contain '/' + tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s|$i||"` + done + fi + case $tmp_deplibs in + *[!\ \ ]*) + echo + if test none = "$deplibs_check_method"; then + echo "*** Warning: inter-library dependencies are not supported in this platform." + else + echo "*** Warning: inter-library dependencies are not known to be supported." + fi + echo "*** All declared inter-library dependencies are being dropped." + droppeddeps=yes + ;; + esac + ;; + esac + versuffix=$versuffix_save + major=$major_save + release=$release_save + libname=$libname_save + name=$name_save + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library with the System framework + newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + if test yes = "$droppeddeps"; then + if test yes = "$module"; then + echo + echo "*** Warning: libtool could not satisfy all declared inter-library" + $ECHO "*** dependencies of module $libname. Therefore, libtool will create" + echo "*** a static module, that should work as long as the dlopening" + echo "*** application is linked with the -dlopen flag." + if test -z "$global_symbol_pipe"; then + echo + echo "*** However, this would only work if libtool was able to extract symbol" + echo "*** lists from a program, using 'nm' or equivalent, but libtool could" + echo "*** not find such a program. So, this module is probably useless." + echo "*** 'nm' from GNU binutils and a full rebuild may help." + fi + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + else + echo "*** The inter-library dependencies that have been dropped here will be" + echo "*** automatically added whenever a program is linked with this library" + echo "*** or is declared to -dlopen it." + + if test no = "$allow_undefined"; then + echo + echo "*** Since this library must not contain undefined symbols," + echo "*** because either the platform does not support them or" + echo "*** it was explicitly requested with -no-undefined," + echo "*** libtool will only create a static version of it." + if test no = "$build_old_libs"; then + oldlibs=$output_objdir/$libname.$libext + build_libtool_libs=module + build_old_libs=yes + else + build_libtool_libs=no + fi + fi + fi + fi + # Done checking deplibs! + deplibs=$newdeplibs + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + case $host in + *-*-darwin*) + newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + deplibs=$new_libs + + # All the library-specific variables (install_libdir is set above). + library_names= + old_library= + dlname= + + # Test again, we may have decided not to build it any more + if test yes = "$build_libtool_libs"; then + # Remove $wl instances when linking with ld. + # FIXME: should test the right _cmds variable. + case $archive_cmds in + *\$LD\ *) wl= ;; + esac + if test yes = "$hardcode_into_libs"; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= + rpath=$finalize_rpath + test relink = "$opt_mode" || rpath=$compile_rpath$rpath + for libdir in $rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + func_replace_sysroot "$libdir" + libdir=$func_replace_sysroot_result + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append dep_rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" + fi + if test -n "$runpath_var" && test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" + fi + test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" + fi + + shlibpath=$finalize_shlibpath + test relink = "$opt_mode" || shlibpath=$compile_shlibpath$shlibpath + if test -n "$shlibpath"; then + eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" + fi + + # Get the real and link names of the library. + eval shared_ext=\"$shrext_cmds\" + eval library_names=\"$library_names_spec\" + set dummy $library_names + shift + realname=$1 + shift + + if test -n "$soname_spec"; then + eval soname=\"$soname_spec\" + else + soname=$realname + fi + if test -z "$dlname"; then + dlname=$soname + fi + + lib=$output_objdir/$realname + linknames= + for link + do + func_append linknames " $link" + done + + # Use standard objects if they are pic + test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` + test "X$libobjs" = "X " && libobjs= + + delfiles= + if test -n "$export_symbols" && test -n "$include_expsyms"; then + $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" + export_symbols=$output_objdir/$libname.uexp + func_append delfiles " $export_symbols" + fi + + orig_export_symbols= + case $host_os in + cygwin* | mingw* | cegcc*) + if test -n "$export_symbols" && test -z "$export_symbols_regex"; then + # exporting using user supplied symfile + func_dll_def_p "$export_symbols" || { + # and it's NOT already a .def file. Must figure out + # which of the given symbols are data symbols and tag + # them as such. So, trigger use of export_symbols_cmds. + # export_symbols gets reassigned inside the "prepare + # the list of exported symbols" if statement, so the + # include_expsyms logic still works. + orig_export_symbols=$export_symbols + export_symbols= + always_export_symbols=yes + } + fi + ;; + esac + + # Prepare the list of exported symbols + if test -z "$export_symbols"; then + if test yes = "$always_export_symbols" || test -n "$export_symbols_regex"; then + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + cmds=$export_symbols_cmds + save_ifs=$IFS; IFS='~' + for cmd1 in $cmds; do + IFS=$save_ifs + # Take the normal branch if the nm_file_list_spec branch + # doesn't work or if tool conversion is not needed. + case $nm_file_list_spec~$to_tool_file_cmd in + *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) + try_normal_branch=yes + eval cmd=\"$cmd1\" + func_len " $cmd" + len=$func_len_result + ;; + *) + try_normal_branch=no + ;; + esac + if test yes = "$try_normal_branch" \ + && { test "$len" -lt "$max_cmd_len" \ + || test "$max_cmd_len" -le -1; } + then + func_show_eval "$cmd" 'exit $?' + skipped_export=false + elif test -n "$nm_file_list_spec"; then + func_basename "$output" + output_la=$func_basename_result + save_libobjs=$libobjs + save_output=$output + output=$output_objdir/$output_la.nm + func_to_tool_file "$output" + libobjs=$nm_file_list_spec$func_to_tool_file_result + func_append delfiles " $output" + func_verbose "creating $NM input file list: $output" + for obj in $save_libobjs; do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > "$output" + eval cmd=\"$cmd1\" + func_show_eval "$cmd" 'exit $?' + output=$save_output + libobjs=$save_libobjs + skipped_export=false + else + # The command line is too long to execute in one step. + func_verbose "using reloadable object file for export list..." + skipped_export=: + # Break out early, otherwise skipped_export may be + # set to false by a later but shorter cmd. + break + fi + done + IFS=$save_ifs + if test -n "$export_symbols_regex" && test : != "$skipped_export"; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + fi + + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test : != "$skipped_export" && test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + + tmp_deplibs= + for test_deplib in $deplibs; do + case " $convenience " in + *" $test_deplib "*) ;; + *) + func_append tmp_deplibs " $test_deplib" + ;; + esac + done + deplibs=$tmp_deplibs + + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec" && + test yes = "$compiler_needs_object" && + test -z "$libobjs"; then + # extract the archives, so we have objects to list. + # TODO: could optimize this to just extract one archive. + whole_archive_flag_spec= + fi + if test -n "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + else + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + fi + + if test yes = "$thread_safe" && test -n "$thread_safe_flag_spec"; then + eval flag=\"$thread_safe_flag_spec\" + func_append linker_flags " $flag" + fi + + # Make a backup of the uninstalled library when relinking + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? + fi + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + eval test_cmds=\"$module_expsym_cmds\" + cmds=$module_expsym_cmds + else + eval test_cmds=\"$module_cmds\" + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + eval test_cmds=\"$archive_expsym_cmds\" + cmds=$archive_expsym_cmds + else + eval test_cmds=\"$archive_cmds\" + cmds=$archive_cmds + fi + fi + + if test : != "$skipped_export" && + func_len " $test_cmds" && + len=$func_len_result && + test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + : + else + # The command line is too long to link in one step, link piecewise + # or, if using GNU ld and skipped_export is not :, use a linker + # script. + + # Save the value of $output and $libobjs because we want to + # use them later. If we have whole_archive_flag_spec, we + # want to use save_libobjs as it was before + # whole_archive_flag_spec was expanded, because we can't + # assume the linker understands whole_archive_flag_spec. + # This may have to be revisited, in case too many + # convenience libraries get linked in and end up exceeding + # the spec. + if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then + save_libobjs=$libobjs + fi + save_output=$output + func_basename "$output" + output_la=$func_basename_result + + # Clear the reloadable object creation command queue and + # initialize k to one. + test_cmds= + concat_cmds= + objlist= + last_robj= + k=1 + + if test -n "$save_libobjs" && test : != "$skipped_export" && test yes = "$with_gnu_ld"; then + output=$output_objdir/$output_la.lnkscript + func_verbose "creating GNU ld script: $output" + echo 'INPUT (' > $output + for obj in $save_libobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + echo ')' >> $output + func_append delfiles " $output" + func_to_tool_file "$output" + output=$func_to_tool_file_result + elif test -n "$save_libobjs" && test : != "$skipped_export" && test -n "$file_list_spec"; then + output=$output_objdir/$output_la.lnk + func_verbose "creating linker input file list: $output" + : > $output + set x $save_libobjs + shift + firstobj= + if test yes = "$compiler_needs_object"; then + firstobj="$1 " + shift + fi + for obj + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" >> $output + done + func_append delfiles " $output" + func_to_tool_file "$output" + output=$firstobj\"$file_list_spec$func_to_tool_file_result\" + else + if test -n "$save_libobjs"; then + func_verbose "creating reloadable object files..." + output=$output_objdir/$output_la-$k.$objext + eval test_cmds=\"$reload_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + + # Loop over the list of objects to be linked. + for obj in $save_libobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + if test -z "$objlist" || + test "$len" -lt "$max_cmd_len"; then + func_append objlist " $obj" + else + # The command $test_cmds is almost too long, add a + # command to the queue. + if test 1 -eq "$k"; then + # The first file doesn't have a previous command to add. + reload_objs=$objlist + eval concat_cmds=\"$reload_cmds\" + else + # All subsequent reloadable object files will link in + # the last one created. + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" + fi + last_robj=$output_objdir/$output_la-$k.$objext + func_arith $k + 1 + k=$func_arith_result + output=$output_objdir/$output_la-$k.$objext + objlist=" $obj" + func_len " $last_robj" + func_arith $len0 + $func_len_result + len=$func_arith_result + fi + done + # Handle the remaining objects by creating one last + # reloadable object file. All subsequent reloadable object + # files will link in the last one created. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + reload_objs="$objlist $last_robj" + eval concat_cmds=\"\$concat_cmds$reload_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + func_append delfiles " $output" + + else + output= + fi + + ${skipped_export-false} && { + func_verbose "generating symbol list for '$libname.la'" + export_symbols=$output_objdir/$libname.exp + $opt_dry_run || $RM $export_symbols + libobjs=$output + # Append the command to create the export file. + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" + if test -n "$last_robj"; then + eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" + fi + } + + test -n "$save_libobjs" && + func_verbose "creating a temporary reloadable object file: $output" + + # Loop through the commands generated above and execute them. + save_ifs=$IFS; IFS='~' + for cmd in $concat_cmds; do + IFS=$save_ifs + $opt_quiet || { + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + if test -n "$export_symbols_regex" && ${skipped_export-false}; then + func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' + func_show_eval '$MV "${export_symbols}T" "$export_symbols"' + fi + fi + + ${skipped_export-false} && { + if test -n "$export_symbols" && test -n "$include_expsyms"; then + tmp_export_symbols=$export_symbols + test -n "$orig_export_symbols" && tmp_export_symbols=$orig_export_symbols + $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' + fi + + if test -n "$orig_export_symbols"; then + # The given exports_symbols file has to be filtered, so filter it. + func_verbose "filter symbol list for '$libname.la' to tag DATA exports" + # FIXME: $output_objdir/$libname.filter potentially contains lots of + # 's' commands, which not all seds can handle. GNU sed should be fine + # though. Also, the filter scales superlinearly with the number of + # global variables. join(1) would be nice here, but unfortunately + # isn't a blessed tool. + $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter + func_append delfiles " $export_symbols $output_objdir/$libname.filter" + export_symbols=$output_objdir/$libname.def + $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols + fi + } + + libobjs=$output + # Restore the value of output. + output=$save_output + + if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then + eval libobjs=\"\$libobjs $whole_archive_flag_spec\" + test "X$libobjs" = "X " && libobjs= + fi + # Expand the library linking commands again to reset the + # value of $libobjs for piecewise linking. + + # Do each of the archive commands. + if test yes = "$module" && test -n "$module_cmds"; then + if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then + cmds=$module_expsym_cmds + else + cmds=$module_cmds + fi + else + if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then + cmds=$archive_expsym_cmds + else + cmds=$archive_cmds + fi + fi + fi + + if test -n "$delfiles"; then + # Append the command to remove temporary files to $cmds. + eval cmds=\"\$cmds~\$RM $delfiles\" + fi + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append libobjs " $func_extract_archives_result" + test "X$libobjs" = "X " && libobjs= + fi + + save_ifs=$IFS; IFS='~' + for cmd in $cmds; do + IFS=$sp$nl + eval cmd=\"$cmd\" + IFS=$save_ifs + $opt_quiet || { + func_quote_arg expand,pretty "$cmd" + eval "func_echo $func_quote_arg_result" + } + $opt_dry_run || eval "$cmd" || { + lt_exit=$? + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + ( cd "$output_objdir" && \ + $RM "${realname}T" && \ + $MV "${realname}U" "$realname" ) + fi + + exit $lt_exit + } + done + IFS=$save_ifs + + # Restore the uninstalled library and exit + if test relink = "$opt_mode"; then + $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? + + if test -n "$convenience"; then + if test -z "$whole_archive_flag_spec"; then + func_show_eval '${RM}r "$gentop"' + fi + fi + + exit $EXIT_SUCCESS + fi + + # Create links to the real library. + for linkname in $linknames; do + if test "$realname" != "$linkname"; then + func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' + fi + done + + # If -module or -export-dynamic was specified, set the dlname. + if test yes = "$module" || test yes = "$export_dynamic"; then + # On all known operating systems, these are identical. + dlname=$soname + fi + fi + ;; + + obj) + if test -n "$dlfiles$dlprefiles" || test no != "$dlself"; then + func_warning "'-dlopen' is ignored for objects" + fi + + case " $deplibs" in + *\ -l* | *\ -L*) + func_warning "'-l' and '-L' are ignored for objects" ;; + esac + + test -n "$rpath" && \ + func_warning "'-rpath' is ignored for objects" + + test -n "$xrpath" && \ + func_warning "'-R' is ignored for objects" + + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for objects" + + test -n "$release" && \ + func_warning "'-release' is ignored for objects" + + case $output in + *.lo) + test -n "$objs$old_deplibs" && \ + func_fatal_error "cannot build library object '$output' from non-libtool objects" + + libobj=$output + func_lo2o "$libobj" + obj=$func_lo2o_result + ;; + *) + libobj= + obj=$output + ;; + esac + + # Delete the old objects. + $opt_dry_run || $RM $obj $libobj + + # Objects from convenience libraries. This assumes + # single-version convenience libraries. Whenever we create + # different ones for PIC/non-PIC, this we'll have to duplicate + # the extraction. + reload_conv_objs= + gentop= + # if reload_cmds runs $LD directly, get rid of -Wl from + # whole_archive_flag_spec and hope we can get by with turning comma + # into space. + case $reload_cmds in + *\$LD[\ \$]*) wl= ;; + esac + if test -n "$convenience"; then + if test -n "$whole_archive_flag_spec"; then + eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" + test -n "$wl" || tmp_whole_archive_flags=`$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` + reload_conv_objs=$reload_objs\ $tmp_whole_archive_flags + else + gentop=$output_objdir/${obj}x + func_append generated " $gentop" + + func_extract_archives $gentop $convenience + reload_conv_objs="$reload_objs $func_extract_archives_result" + fi + fi + + # If we're not building shared, we need to use non_pic_objs + test yes = "$build_libtool_libs" || libobjs=$non_pic_objects + + # Create the old-style object. + reload_objs=$objs$old_deplibs' '`$ECHO "$libobjs" | $SP2NL | $SED "/\.$libext$/d; /\.lib$/d; $lo2o" | $NL2SP`' '$reload_conv_objs + + output=$obj + func_execute_cmds "$reload_cmds" 'exit $?' + + # Exit if we aren't doing a library object file. + if test -z "$libobj"; then + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + fi + + test yes = "$build_libtool_libs" || { + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + # Create an invalid libtool object if no PIC, so that we don't + # accidentally link it into a program. + # $show "echo timestamp > $libobj" + # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? + exit $EXIT_SUCCESS + } + + if test -n "$pic_flag" || test default != "$pic_mode"; then + # Only do commands if we really have different PIC objects. + reload_objs="$libobjs $reload_conv_objs" + output=$libobj + func_execute_cmds "$reload_cmds" 'exit $?' + fi + + if test -n "$gentop"; then + func_show_eval '${RM}r "$gentop"' + fi + + exit $EXIT_SUCCESS + ;; + + prog) + case $host in + *cygwin*) func_stripname '' '.exe' "$output" + output=$func_stripname_result.exe;; + esac + test -n "$vinfo" && \ + func_warning "'-version-info' is ignored for programs" + + test -n "$release" && \ + func_warning "'-release' is ignored for programs" + + $preload \ + && test unknown,unknown,unknown = "$dlopen_support,$dlopen_self,$dlopen_self_static" \ + && func_warning "'LT_INIT([dlopen])' not used. Assuming no dlopen support." + + case $host in + *-*-rhapsody* | *-*-darwin1.[012]) + # On Rhapsody replace the C library is the System framework + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` + ;; + esac + + case $host in + *-*-darwin*) + # Don't allow lazy linking, it breaks C++ global constructors + # But is supposedly fixed on 10.4 or later (yay!). + if test CXX = "$tagname"; then + case ${MACOSX_DEPLOYMENT_TARGET-10.0} in + 10.[0123]) + func_append compile_command " $wl-bind_at_load" + func_append finalize_command " $wl-bind_at_load" + ;; + esac + fi + # Time to change all our "foo.ltframework" stuff back to "-framework foo" + compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` + ;; + esac + + + # move library search paths that coincide with paths to not yet + # installed libraries to the beginning of the library search list + new_libs= + for path in $notinst_path; do + case " $new_libs " in + *" -L$path/$objdir "*) ;; + *) + case " $compile_deplibs " in + *" -L$path/$objdir "*) + func_append new_libs " -L$path/$objdir" ;; + esac + ;; + esac + done + for deplib in $compile_deplibs; do + case $deplib in + -L*) + case " $new_libs " in + *" $deplib "*) ;; + *) func_append new_libs " $deplib" ;; + esac + ;; + *) func_append new_libs " $deplib" ;; + esac + done + compile_deplibs=$new_libs + + + func_append compile_command " $compile_deplibs" + func_append finalize_command " $finalize_deplibs" + + if test -n "$rpath$xrpath"; then + # If the user specified any rpath flags, then add them. + for libdir in $rpath $xrpath; do + # This is the magic to use -rpath. + case "$finalize_rpath " in + *" $libdir "*) ;; + *) func_append finalize_rpath " $libdir" ;; + esac + done + fi + + # Now hardcode the library paths + rpath= + hardcode_libdirs= + for libdir in $compile_rpath $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$perm_rpath " in + *" $libdir "*) ;; + *) func_append perm_rpath " $libdir" ;; + esac + fi + case $host in + *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) + testbindir=`$ECHO "$libdir" | $SED -e 's*/lib$*/bin*'` + case :$dllsearchpath: in + *":$libdir:"*) ;; + ::) dllsearchpath=$libdir;; + *) func_append dllsearchpath ":$libdir";; + esac + case :$dllsearchpath: in + *":$testbindir:"*) ;; + ::) dllsearchpath=$testbindir;; + *) func_append dllsearchpath ":$testbindir";; + esac + ;; + esac + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + compile_rpath=$rpath + + rpath= + hardcode_libdirs= + for libdir in $finalize_rpath; do + if test -n "$hardcode_libdir_flag_spec"; then + if test -n "$hardcode_libdir_separator"; then + if test -z "$hardcode_libdirs"; then + hardcode_libdirs=$libdir + else + # Just accumulate the unique libdirs. + case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in + *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) + ;; + *) + func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" + ;; + esac + fi + else + eval flag=\"$hardcode_libdir_flag_spec\" + func_append rpath " $flag" + fi + elif test -n "$runpath_var"; then + case "$finalize_perm_rpath " in + *" $libdir "*) ;; + *) func_append finalize_perm_rpath " $libdir" ;; + esac + fi + done + # Substitute the hardcoded libdirs into the rpath. + if test -n "$hardcode_libdir_separator" && + test -n "$hardcode_libdirs"; then + libdir=$hardcode_libdirs + eval rpath=\" $hardcode_libdir_flag_spec\" + fi + finalize_rpath=$rpath + + if test -n "$libobjs" && test yes = "$build_old_libs"; then + # Transform all the library objects into standard objects. + compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` + fi + + func_generate_dlsyms "$outputname" "@PROGRAM@" false + + # template prelinking step + if test -n "$prelink_cmds"; then + func_execute_cmds "$prelink_cmds" 'exit $?' + fi + + wrappers_required=: + case $host in + *cegcc* | *mingw32ce*) + # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. + wrappers_required=false + ;; + *cygwin* | *mingw* ) + test yes = "$build_libtool_libs" || wrappers_required=false + ;; + *) + if test no = "$need_relink" || test yes != "$build_libtool_libs"; then + wrappers_required=false + fi + ;; + esac + $wrappers_required || { + # Replace the output file specification. + compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + link_command=$compile_command$compile_rpath + + # We have no uninstalled library dependencies, so finalize right now. + exit_status=0 + func_show_eval "$link_command" 'exit_status=$?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Delete the generated files. + if test -f "$output_objdir/${outputname}S.$objext"; then + func_show_eval '$RM "$output_objdir/${outputname}S.$objext"' + fi + + exit $exit_status + } + + if test -n "$compile_shlibpath$finalize_shlibpath"; then + compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" + fi + if test -n "$finalize_shlibpath"; then + finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" + fi + + compile_var= + finalize_var= + if test -n "$runpath_var"; then + if test -n "$perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $perm_rpath; do + func_append rpath "$dir:" + done + compile_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + if test -n "$finalize_perm_rpath"; then + # We should set the runpath_var. + rpath= + for dir in $finalize_perm_rpath; do + func_append rpath "$dir:" + done + finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " + fi + fi + + if test yes = "$no_install"; then + # We don't need to create a wrapper script. + link_command=$compile_var$compile_command$compile_rpath + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` + # Delete the old output file. + $opt_dry_run || $RM $output + # Link the executable and exit + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + exit $EXIT_SUCCESS + fi + + case $hardcode_action,$fast_install in + relink,*) + # Fast installation is not supported + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + + func_warning "this platform does not like uninstalled shared libraries" + func_warning "'$output' will be relinked during installation" + ;; + *,yes) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` + ;; + *,no) + link_command=$compile_var$compile_command$compile_rpath + relink_command=$finalize_var$finalize_command$finalize_rpath + ;; + *,needless) + link_command=$finalize_var$compile_command$finalize_rpath + relink_command= + ;; + esac + + # Replace the output file specification. + link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` + + # Delete the old output files. + $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname + + func_show_eval "$link_command" 'exit $?' + + if test -n "$postlink_cmds"; then + func_to_tool_file "$output_objdir/$outputname" + postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` + func_execute_cmds "$postlink_cmds" 'exit $?' + fi + + # Now create the wrapper script. + func_verbose "creating $output" + + # Quote the relink command for shipping. + if test -n "$relink_command"; then + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_arg pretty "$var_value" + relink_command="$var=$func_quote_arg_result; export $var; $relink_command" + fi + done + func_quote eval cd "`pwd`" + func_quote_arg pretty,unquoted "($func_quote_result; $relink_command)" + relink_command=$func_quote_arg_unquoted_result + fi + + # Only actually do things if not in dry run mode. + $opt_dry_run || { + # win32 will think the script is a binary if it has + # a .exe suffix, so we strip it off here. + case $output in + *.exe) func_stripname '' '.exe' "$output" + output=$func_stripname_result ;; + esac + # test for cygwin because mv fails w/o .exe extensions + case $host in + *cygwin*) + exeext=.exe + func_stripname '' '.exe' "$outputname" + outputname=$func_stripname_result ;; + *) exeext= ;; + esac + case $host in + *cygwin* | *mingw* ) + func_dirname_and_basename "$output" "" "." + output_name=$func_basename_result + output_path=$func_dirname_result + cwrappersource=$output_path/$objdir/lt-$output_name.c + cwrapper=$output_path/$output_name.exe + $RM $cwrappersource $cwrapper + trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 + + func_emit_cwrapperexe_src > $cwrappersource + + # The wrapper executable is built using the $host compiler, + # because it contains $host paths and files. If cross- + # compiling, it, like the target executable, must be + # executed on the $host or under an emulation environment. + $opt_dry_run || { + $LTCC $LTCFLAGS -o $cwrapper $cwrappersource + $STRIP $cwrapper + } + + # Now, create the wrapper script for func_source use: + func_ltwrapper_scriptname $cwrapper + $RM $func_ltwrapper_scriptname_result + trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 + $opt_dry_run || { + # note: this script will not be executed, so do not chmod. + if test "x$build" = "x$host"; then + $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result + else + func_emit_wrapper no > $func_ltwrapper_scriptname_result + fi + } + ;; + * ) + $RM $output + trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 + + func_emit_wrapper no > $output + chmod +x $output + ;; + esac + } + exit $EXIT_SUCCESS + ;; + esac + + # See if we need to build an old-fashioned archive. + for oldlib in $oldlibs; do + + case $build_libtool_libs in + convenience) + oldobjs="$libobjs_save $symfileobj" + addlibs=$convenience + build_libtool_libs=no + ;; + module) + oldobjs=$libobjs_save + addlibs=$old_convenience + build_libtool_libs=no + ;; + *) + oldobjs="$old_deplibs $non_pic_objects" + $preload && test -f "$symfileobj" \ + && func_append oldobjs " $symfileobj" + addlibs=$old_convenience + ;; + esac + + if test -n "$addlibs"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $addlibs + func_append oldobjs " $func_extract_archives_result" + fi + + # Do each command in the archive commands. + if test -n "$old_archive_from_new_cmds" && test yes = "$build_libtool_libs"; then + cmds=$old_archive_from_new_cmds + else + + # Add any objects from preloaded convenience libraries + if test -n "$dlprefiles"; then + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + + func_extract_archives $gentop $dlprefiles + func_append oldobjs " $func_extract_archives_result" + fi + + # POSIX demands no paths to be encoded in archives. We have + # to avoid creating archives with duplicate basenames if we + # might have to extract them afterwards, e.g., when creating a + # static archive out of a convenience library, or when linking + # the entirety of a libtool archive into another (currently + # not supported by libtool). + if (for obj in $oldobjs + do + func_basename "$obj" + $ECHO "$func_basename_result" + done | sort | sort -uc >/dev/null 2>&1); then + : + else + echo "copying selected object files to avoid basename conflicts..." + gentop=$output_objdir/${outputname}x + func_append generated " $gentop" + func_mkdir_p "$gentop" + save_oldobjs=$oldobjs + oldobjs= + counter=1 + for obj in $save_oldobjs + do + func_basename "$obj" + objbase=$func_basename_result + case " $oldobjs " in + " ") oldobjs=$obj ;; + *[\ /]"$objbase "*) + while :; do + # Make sure we don't pick an alternate name that also + # overlaps. + newobj=lt$counter-$objbase + func_arith $counter + 1 + counter=$func_arith_result + case " $oldobjs " in + *[\ /]"$newobj "*) ;; + *) if test ! -f "$gentop/$newobj"; then break; fi ;; + esac + done + func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" + func_append oldobjs " $gentop/$newobj" + ;; + *) func_append oldobjs " $obj" ;; + esac + done + fi + func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 + tool_oldlib=$func_to_tool_file_result + eval cmds=\"$old_archive_cmds\" + + func_len " $cmds" + len=$func_len_result + if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then + cmds=$old_archive_cmds + elif test -n "$archiver_list_spec"; then + func_verbose "using command file archive linking..." + for obj in $oldobjs + do + func_to_tool_file "$obj" + $ECHO "$func_to_tool_file_result" + done > $output_objdir/$libname.libcmd + func_to_tool_file "$output_objdir/$libname.libcmd" + oldobjs=" $archiver_list_spec$func_to_tool_file_result" + cmds=$old_archive_cmds + else + # the command line is too long to link in one step, link in parts + func_verbose "using piecewise archive linking..." + save_RANLIB=$RANLIB + RANLIB=: + objlist= + concat_cmds= + save_oldobjs=$oldobjs + oldobjs= + # Is there a better way of finding the last object in the list? + for obj in $save_oldobjs + do + last_oldobj=$obj + done + eval test_cmds=\"$old_archive_cmds\" + func_len " $test_cmds" + len0=$func_len_result + len=$len0 + for obj in $save_oldobjs + do + func_len " $obj" + func_arith $len + $func_len_result + len=$func_arith_result + func_append objlist " $obj" + if test "$len" -lt "$max_cmd_len"; then + : + else + # the above command should be used before it gets too long + oldobjs=$objlist + if test "$obj" = "$last_oldobj"; then + RANLIB=$save_RANLIB + fi + test -z "$concat_cmds" || concat_cmds=$concat_cmds~ + eval concat_cmds=\"\$concat_cmds$old_archive_cmds\" + objlist= + len=$len0 + fi + done + RANLIB=$save_RANLIB + oldobjs=$objlist + if test -z "$oldobjs"; then + eval cmds=\"\$concat_cmds\" + else + eval cmds=\"\$concat_cmds~\$old_archive_cmds\" + fi + fi + fi + func_execute_cmds "$cmds" 'exit $?' + done + + test -n "$generated" && \ + func_show_eval "${RM}r$generated" + + # Now create the libtool archive. + case $output in + *.la) + old_library= + test yes = "$build_old_libs" && old_library=$libname.$libext + func_verbose "creating $output" + + # Preserve any variables that may affect compiler behavior + for var in $variables_saved_for_relink; do + if eval test -z \"\${$var+set}\"; then + relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" + elif eval var_value=\$$var; test -z "$var_value"; then + relink_command="$var=; export $var; $relink_command" + else + func_quote_arg pretty,unquoted "$var_value" + relink_command="$var=$func_quote_arg_unquoted_result; export $var; $relink_command" + fi + done + # Quote the link command for shipping. + func_quote eval cd "`pwd`" + relink_command="($func_quote_result; $SHELL \"$progpath\" $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" + func_quote_arg pretty,unquoted "$relink_command" + relink_command=$func_quote_arg_unquoted_result + if test yes = "$hardcode_automatic"; then + relink_command= + fi + + # Only create the output if not a dry run. + $opt_dry_run || { + for installed in no yes; do + if test yes = "$installed"; then + if test -z "$install_libdir"; then + break + fi + output=$output_objdir/${outputname}i + # Replace all uninstalled libtool libraries with the installed ones + newdependency_libs= + for deplib in $dependency_libs; do + case $deplib in + *.la) + func_basename "$deplib" + name=$func_basename_result + func_resolve_sysroot "$deplib" + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` + test -z "$libdir" && \ + func_fatal_error "'$deplib' is not a valid libtool archive" + func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" + ;; + -L*) + func_stripname -L '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -L$func_replace_sysroot_result" + ;; + -R*) + func_stripname -R '' "$deplib" + func_replace_sysroot "$func_stripname_result" + func_append newdependency_libs " -R$func_replace_sysroot_result" + ;; + *) func_append newdependency_libs " $deplib" ;; + esac + done + dependency_libs=$newdependency_libs + newdlfiles= + + for lib in $dlfiles; do + case $lib in + *.la) + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" + ;; + *) func_append newdlfiles " $lib" ;; + esac + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + *.la) + # Only pass preopened files to the pseudo-archive (for + # eventual linking with the app. that links it) if we + # didn't already link the preopened objects directly into + # the library: + func_basename "$lib" + name=$func_basename_result + eval libdir=`$SED -n -e 's/^libdir=\(.*\)$/\1/p' $lib` + test -z "$libdir" && \ + func_fatal_error "'$lib' is not a valid libtool archive" + func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" + ;; + esac + done + dlprefiles=$newdlprefiles + else + newdlfiles= + for lib in $dlfiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlfiles " $abs" + done + dlfiles=$newdlfiles + newdlprefiles= + for lib in $dlprefiles; do + case $lib in + [\\/]* | [A-Za-z]:[\\/]*) abs=$lib ;; + *) abs=`pwd`"/$lib" ;; + esac + func_append newdlprefiles " $abs" + done + dlprefiles=$newdlprefiles + fi + $RM $output + # place dlname in correct position for cygwin + # In fact, it would be nice if we could use this code for all target + # systems that can't hard-code library paths into their executables + # and that have no shared library path variable independent of PATH, + # but it turns out we can't easily determine that from inspecting + # libtool variables, so we have to hard-code the OSs to which it + # applies here; at the moment, that means platforms that use the PE + # object format with DLL files. See the long comment at the top of + # tests/bindir.at for full details. + tdlname=$dlname + case $host,$output,$installed,$module,$dlname in + *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) + # If a -bindir argument was supplied, place the dll there. + if test -n "$bindir"; then + func_relative_path "$install_libdir" "$bindir" + tdlname=$func_relative_path_result/$dlname + else + # Otherwise fall back on heuristic. + tdlname=../bin/$dlname + fi + ;; + esac + $ECHO > $output "\ +# $outputname - a libtool library file +# Generated by $PROGRAM (GNU $PACKAGE) $VERSION +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# The name that we can dlopen(3). +dlname='$tdlname' + +# Names of this library. +library_names='$library_names' + +# The name of the static archive. +old_library='$old_library' + +# Linker flags that cannot go in dependency_libs. +inherited_linker_flags='$new_inherited_linker_flags' + +# Libraries that this one depends upon. +dependency_libs='$dependency_libs' + +# Names of additional weak libraries provided by this library +weak_library_names='$weak_libs' + +# Version information for $libname. +current=$current +age=$age +revision=$revision + +# Is this an already installed library? +installed=$installed + +# Should we warn about portability when linking against -modules? +shouldnotlink=$module + +# Files to dlopen/dlpreopen +dlopen='$dlfiles' +dlpreopen='$dlprefiles' + +# Directory that this library needs to be installed in: +libdir='$install_libdir'" + if test no,yes = "$installed,$need_relink"; then + $ECHO >> $output "\ +relink_command=\"$relink_command\"" + fi + done + } + + # Do a symbolic link so that the libtool archive can be found in + # LD_LIBRARY_PATH before the program is installed. + func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' + ;; + esac + exit $EXIT_SUCCESS +} + +if test link = "$opt_mode" || test relink = "$opt_mode"; then + func_mode_link ${1+"$@"} +fi + + +# func_mode_uninstall arg... +func_mode_uninstall () +{ + $debug_cmd + + RM=$nonopt + files= + rmforce=false + exit_status=0 + + # This variable tells wrapper scripts just to set variables rather + # than running their programs. + libtool_install_magic=$magic + + for arg + do + case $arg in + -f) func_append RM " $arg"; rmforce=: ;; + -*) func_append RM " $arg" ;; + *) func_append files " $arg" ;; + esac + done + + test -z "$RM" && \ + func_fatal_help "you must specify an RM program" + + rmdirs= + + for file in $files; do + func_dirname "$file" "" "." + dir=$func_dirname_result + if test . = "$dir"; then + odir=$objdir + else + odir=$dir/$objdir + fi + func_basename "$file" + name=$func_basename_result + test uninstall = "$opt_mode" && odir=$dir + + # Remember odir for removal later, being careful to avoid duplicates + if test clean = "$opt_mode"; then + case " $rmdirs " in + *" $odir "*) ;; + *) func_append rmdirs " $odir" ;; + esac + fi + + # Don't error if the file doesn't exist and rm -f was used. + if { test -L "$file"; } >/dev/null 2>&1 || + { test -h "$file"; } >/dev/null 2>&1 || + test -f "$file"; then + : + elif test -d "$file"; then + exit_status=1 + continue + elif $rmforce; then + continue + fi + + rmfiles=$file + + case $name in + *.la) + # Possibly a libtool archive, so verify it. + if func_lalib_p "$file"; then + func_source $dir/$name + + # Delete the libtool libraries and symlinks. + for n in $library_names; do + func_append rmfiles " $odir/$n" + done + test -n "$old_library" && func_append rmfiles " $odir/$old_library" + + case $opt_mode in + clean) + case " $library_names " in + *" $dlname "*) ;; + *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; + esac + test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" + ;; + uninstall) + if test -n "$library_names"; then + # Do each command in the postuninstall commands. + func_execute_cmds "$postuninstall_cmds" '$rmforce || exit_status=1' + fi + + if test -n "$old_library"; then + # Do each command in the old_postuninstall commands. + func_execute_cmds "$old_postuninstall_cmds" '$rmforce || exit_status=1' + fi + # FIXME: should reinstall the best remaining shared library. + ;; + esac + fi + ;; + + *.lo) + # Possibly a libtool object, so verify it. + if func_lalib_p "$file"; then + + # Read the .lo file + func_source $dir/$name + + # Add PIC object to the list of files to remove. + if test -n "$pic_object" && test none != "$pic_object"; then + func_append rmfiles " $dir/$pic_object" + fi + + # Add non-PIC object to the list of files to remove. + if test -n "$non_pic_object" && test none != "$non_pic_object"; then + func_append rmfiles " $dir/$non_pic_object" + fi + fi + ;; + + *) + if test clean = "$opt_mode"; then + noexename=$name + case $file in + *.exe) + func_stripname '' '.exe' "$file" + file=$func_stripname_result + func_stripname '' '.exe' "$name" + noexename=$func_stripname_result + # $file with .exe has already been added to rmfiles, + # add $file without .exe + func_append rmfiles " $file" + ;; + esac + # Do a test to see if this is a libtool program. + if func_ltwrapper_p "$file"; then + if func_ltwrapper_executable_p "$file"; then + func_ltwrapper_scriptname "$file" + relink_command= + func_source $func_ltwrapper_scriptname_result + func_append rmfiles " $func_ltwrapper_scriptname_result" + else + relink_command= + func_source $dir/$noexename + fi + + # note $name still contains .exe if it was in $file originally + # as does the version of $file that was added into $rmfiles + func_append rmfiles " $odir/$name $odir/${name}S.$objext" + if test yes = "$fast_install" && test -n "$relink_command"; then + func_append rmfiles " $odir/lt-$name" + fi + if test "X$noexename" != "X$name"; then + func_append rmfiles " $odir/lt-$noexename.c" + fi + fi + fi + ;; + esac + func_show_eval "$RM $rmfiles" 'exit_status=1' + done + + # Try to remove the $objdir's in the directories where we deleted files + for dir in $rmdirs; do + if test -d "$dir"; then + func_show_eval "rmdir $dir >/dev/null 2>&1" + fi + done + + exit $exit_status +} + +if test uninstall = "$opt_mode" || test clean = "$opt_mode"; then + func_mode_uninstall ${1+"$@"} +fi + +test -z "$opt_mode" && { + help=$generic_help + func_fatal_help "you must specify a MODE" +} + +test -z "$exec_cmd" && \ + func_fatal_help "invalid operation mode '$opt_mode'" + +if test -n "$exec_cmd"; then + eval exec "$exec_cmd" + exit $EXIT_FAILURE +fi + +exit $exit_status + + +# The TAGs below are defined such that we never get into a situation +# where we disable both kinds of libraries. Given conflicting +# choices, we go for a static library, that is the most portable, +# since we can't tell whether shared libraries were disabled because +# the user asked for that or because the platform doesn't support +# them. This is particularly important on AIX, because we don't +# support having both static and shared libraries enabled at the same +# time on that platform, so we default to a shared-only configuration. +# If a disable-shared tag is given, we'll fallback to a static-only +# configuration. But we'll never go from static-only to shared-only. + +# ### BEGIN LIBTOOL TAG CONFIG: disable-shared +build_libtool_libs=no +build_old_libs=yes +# ### END LIBTOOL TAG CONFIG: disable-shared + +# ### BEGIN LIBTOOL TAG CONFIG: disable-static +build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` +# ### END LIBTOOL TAG CONFIG: disable-static + +# Local Variables: +# mode:shell-script +# sh-indentation:2 +# End: diff --git a/3rd/pcre2/m4/ax_pthread.m4 b/3rd/pcre2/m4/ax_pthread.m4 new file mode 100644 index 00000000..d90de34d --- /dev/null +++ b/3rd/pcre2/m4/ax_pthread.m4 @@ -0,0 +1,309 @@ +# =========================================================================== +# http://www.gnu.org/software/autoconf-archive/ax_pthread.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro figures out how to build C programs using POSIX threads. It +# sets the PTHREAD_LIBS output variable to the threads library and linker +# flags, and the PTHREAD_CFLAGS output variable to any special C compiler +# flags that are needed. (The user can also force certain compiler +# flags/libs to be tested by setting these environment variables.) +# +# Also sets PTHREAD_CC to any special C compiler that is needed for +# multi-threaded programs (defaults to the value of CC otherwise). (This +# is necessary on AIX to use the special cc_r compiler alias.) +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. e.g. you should link with +# $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS +# +# If you are only building threads programs, you may wish to use these +# variables in your default LIBS, CFLAGS, and CC: +# +# LIBS="$PTHREAD_LIBS $LIBS" +# CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +# CC="$PTHREAD_CC" +# +# In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant +# has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name +# (e.g. PTHREAD_CREATE_UNDETACHED on AIX). +# +# Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the +# PTHREAD_PRIO_INHERIT symbol is defined when compiling with +# PTHREAD_CFLAGS. +# +# ACTION-IF-FOUND is a list of shell commands to run if a threads library +# is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it +# is not found. If ACTION-IF-FOUND is not specified, the default action +# will define HAVE_PTHREAD. +# +# Please let the authors know if this macro fails on any platform, or if +# you have any other suggestions or comments. This macro was based on work +# by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help +# from M. Frigo), as well as ac_pthread and hb_pthread macros posted by +# Alejandro Forero Cuervo to the autoconf macro repository. We are also +# grateful for the helpful feedback of numerous users. +# +# Updated for Autoconf 2.68 by Daniel Richard G. +# +# LICENSE +# +# Copyright (c) 2008 Steven G. Johnson +# Copyright (c) 2011 Daniel Richard G. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Archive. When you make and distribute a +# modified version of the Autoconf Macro, you may extend this special +# exception to the GPL to apply to your modified version as well. + +#serial 18 + +AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) +AC_DEFUN([AX_PTHREAD], [ +AC_REQUIRE([AC_CANONICAL_HOST]) +AC_LANG_PUSH([C]) +ax_pthread_ok=no + +# We used to check for pthread.h first, but this fails if pthread.h +# requires special compiler flags (e.g. on True64 or Sequent). +# It gets checked for in the link test anyway. + +# First of all, check if the user has set any of the PTHREAD_LIBS, +# etcetera environment variables, and if threads linking works using +# them: +if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) + AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes) + AC_MSG_RESULT($ax_pthread_ok) + if test x"$ax_pthread_ok" = xno; then + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" + fi + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" +fi + +# We must check for the threads library under a number of different +# names; the ordering is very important because some systems +# (e.g. DEC) have both -lpthread and -lpthreads, where one of the +# libraries is broken (non-POSIX). + +# Create a list of thread flags to try. Items starting with a "-" are +# C compiler flags, and other items are library names, except for "none" +# which indicates that we try without any flags at all, and "pthread-config" +# which is a program returning the flags for the Pth emulation library. + +ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" + +# The ordering *is* (sometimes) important. Some notes on the +# individual items follow: + +# pthreads: AIX (must check this before -lpthread) +# none: in case threads are in libc; should be tried before -Kthread and +# other compiler flags to prevent continual compiler warnings +# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) +# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) +# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) +# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) +# -pthreads: Solaris/gcc +# -mthreads: Mingw32/gcc, Lynx/gcc +# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it +# doesn't hurt to check since this sometimes defines pthreads too; +# also defines -D_REENTRANT) +# ... -mt is also the pthreads flag for HP/aCC +# pthread: Linux, etcetera +# --thread-safe: KAI C++ +# pthread-config: use pthread-config program (for GNU Pth library) + +case ${host_os} in + solaris*) + + # On Solaris (at least, for some versions), libc contains stubbed + # (non-functional) versions of the pthreads routines, so link-based + # tests will erroneously succeed. (We need to link with -pthreads/-mt/ + # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather + # a function called by this macro, so we could check for that, but + # who knows whether they'll stub that too in a future libc.) So, + # we'll just look for -pthreads and -lpthread first: + + ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags" + ;; + + darwin*) + ax_pthread_flags="-pthread $ax_pthread_flags" + ;; +esac + +if test x"$ax_pthread_ok" = xno; then +for flag in $ax_pthread_flags; do + + case $flag in + none) + AC_MSG_CHECKING([whether pthreads work without any flags]) + ;; + + -*) + AC_MSG_CHECKING([whether pthreads work with $flag]) + PTHREAD_CFLAGS="$flag" + ;; + + pthread-config) + AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no) + if test x"$ax_pthread_config" = xno; then continue; fi + PTHREAD_CFLAGS="`pthread-config --cflags`" + PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" + ;; + + *) + AC_MSG_CHECKING([for the pthreads library -l$flag]) + PTHREAD_LIBS="-l$flag" + ;; + esac + + save_LIBS="$LIBS" + save_CFLAGS="$CFLAGS" + LIBS="$PTHREAD_LIBS $LIBS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Check for various functions. We must include pthread.h, + # since some functions may be macros. (On the Sequent, we + # need a special flag -Kthread to make this header compile.) + # We check for pthread_join because it is in -lpthread on IRIX + # while pthread_create is in libc. We check for pthread_attr_init + # due to DEC craziness with -lpthreads. We check for + # pthread_cleanup_push because it is one of the few pthread + # functions on Solaris that doesn't have a non-functional libc stub. + # We try pthread_create on general principles. + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include + static void routine(void *a) { a = 0; } + static void *start_routine(void *a) { return a; }], + [pthread_t th; pthread_attr_t attr; + pthread_create(&th, 0, start_routine, 0); + pthread_join(th, 0); + pthread_attr_init(&attr); + pthread_cleanup_push(routine, 0); + pthread_cleanup_pop(0) /* ; */])], + [ax_pthread_ok=yes], + []) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + AC_MSG_RESULT($ax_pthread_ok) + if test "x$ax_pthread_ok" = xyes; then + break; + fi + + PTHREAD_LIBS="" + PTHREAD_CFLAGS="" +done +fi + +# Various other checks: +if test "x$ax_pthread_ok" = xyes; then + save_LIBS="$LIBS" + LIBS="$PTHREAD_LIBS $LIBS" + save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS $PTHREAD_CFLAGS" + + # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. + AC_MSG_CHECKING([for joinable pthread attribute]) + attr_name=unknown + for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], + [int attr = $attr; return attr /* ; */])], + [attr_name=$attr; break], + []) + done + AC_MSG_RESULT($attr_name) + if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then + AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, + [Define to necessary symbol if this constant + uses a non-standard name on your system.]) + fi + + AC_MSG_CHECKING([if more special flags are required for pthreads]) + flag=no + case ${host_os} in + aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";; + osf* | hpux*) flag="-D_REENTRANT";; + solaris*) + if test "$GCC" = "yes"; then + flag="-D_REENTRANT" + else + flag="-mt -D_REENTRANT" + fi + ;; + esac + AC_MSG_RESULT(${flag}) + if test "x$flag" != xno; then + PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" + fi + + AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], + ax_cv_PTHREAD_PRIO_INHERIT, [ + AC_LINK_IFELSE([ + AC_LANG_PROGRAM([[#include ]], [[int i = PTHREAD_PRIO_INHERIT;]])], + [ax_cv_PTHREAD_PRIO_INHERIT=yes], + [ax_cv_PTHREAD_PRIO_INHERIT=no]) + ]) + AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"], + AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.])) + + LIBS="$save_LIBS" + CFLAGS="$save_CFLAGS" + + # More AIX lossage: must compile with xlc_r or cc_r + if test x"$GCC" != xyes; then + AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) + else + PTHREAD_CC=$CC + fi +else + PTHREAD_CC="$CC" +fi + +AC_SUBST(PTHREAD_LIBS) +AC_SUBST(PTHREAD_CFLAGS) +AC_SUBST(PTHREAD_CC) + +# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: +if test x"$ax_pthread_ok" = xyes; then + ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) + : +else + ax_pthread_ok=no + $2 +fi +AC_LANG_POP +])dnl AX_PTHREAD diff --git a/3rd/pcre2/m4/libtool.m4 b/3rd/pcre2/m4/libtool.m4 new file mode 100644 index 00000000..e7b68334 --- /dev/null +++ b/3rd/pcre2/m4/libtool.m4 @@ -0,0 +1,8427 @@ +# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- +# +# Copyright (C) 1996-2001, 2003-2019, 2021-2022 Free Software +# Foundation, Inc. +# Written by Gordon Matzigkeit, 1996 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +m4_define([_LT_COPYING], [dnl +# Copyright (C) 2014 Free Software Foundation, Inc. +# This is free software; see the source for copying conditions. There is NO +# warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +# GNU Libtool is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of of the License, or +# (at your option) any later version. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program or library that is built +# using GNU Libtool, you may include this file under the same +# distribution terms that you use for the rest of that program. +# +# GNU Libtool is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +]) + +# serial 59 LT_INIT + + +# LT_PREREQ(VERSION) +# ------------------ +# Complain and exit if this libtool version is less that VERSION. +m4_defun([LT_PREREQ], +[m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, + [m4_default([$3], + [m4_fatal([Libtool version $1 or higher is required], + 63)])], + [$2])]) + + +# _LT_CHECK_BUILDDIR +# ------------------ +# Complain if the absolute build directory name contains unusual characters +m4_defun([_LT_CHECK_BUILDDIR], +[case `pwd` in + *\ * | *\ *) + AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; +esac +]) + + +# LT_INIT([OPTIONS]) +# ------------------ +AC_DEFUN([LT_INIT], +[AC_PREREQ([2.62])dnl We use AC_PATH_PROGS_FEATURE_CHECK +AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl +AC_BEFORE([$0], [LT_LANG])dnl +AC_BEFORE([$0], [LT_OUTPUT])dnl +AC_BEFORE([$0], [LTDL_INIT])dnl +m4_require([_LT_CHECK_BUILDDIR])dnl + +dnl Autoconf doesn't catch unexpanded LT_ macros by default: +m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl +m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl +dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 +dnl unless we require an AC_DEFUNed macro: +AC_REQUIRE([LTOPTIONS_VERSION])dnl +AC_REQUIRE([LTSUGAR_VERSION])dnl +AC_REQUIRE([LTVERSION_VERSION])dnl +AC_REQUIRE([LTOBSOLETE_VERSION])dnl +m4_require([_LT_PROG_LTMAIN])dnl + +_LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) + +dnl Parse OPTIONS +_LT_SET_OPTIONS([$0], [$1]) + +# This can be used to rebuild libtool when needed +LIBTOOL_DEPS=$ltmain + +# Always use our own libtool. +LIBTOOL='$(SHELL) $(top_builddir)/libtool' +AC_SUBST(LIBTOOL)dnl + +_LT_SETUP + +# Only expand once: +m4_define([LT_INIT]) +])# LT_INIT + +# Old names: +AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) +AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PROG_LIBTOOL], []) +dnl AC_DEFUN([AM_PROG_LIBTOOL], []) + + +# _LT_PREPARE_CC_BASENAME +# ----------------------- +m4_defun([_LT_PREPARE_CC_BASENAME], [ +# Calculate cc_basename. Skip known compiler wrappers and cross-prefix. +func_cc_basename () +{ + for cc_temp in @S|@*""; do + case $cc_temp in + compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; + distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; + \-*) ;; + *) break;; + esac + done + func_cc_basename_result=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` +} +])# _LT_PREPARE_CC_BASENAME + + +# _LT_CC_BASENAME(CC) +# ------------------- +# It would be clearer to call AC_REQUIREs from _LT_PREPARE_CC_BASENAME, +# but that macro is also expanded into generated libtool script, which +# arranges for $SED and $ECHO to be set by different means. +m4_defun([_LT_CC_BASENAME], +[m4_require([_LT_PREPARE_CC_BASENAME])dnl +AC_REQUIRE([_LT_DECL_SED])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl +func_cc_basename $1 +cc_basename=$func_cc_basename_result +]) + + +# _LT_FILEUTILS_DEFAULTS +# ---------------------- +# It is okay to use these file commands and assume they have been set +# sensibly after 'm4_require([_LT_FILEUTILS_DEFAULTS])'. +m4_defun([_LT_FILEUTILS_DEFAULTS], +[: ${CP="cp -f"} +: ${MV="mv -f"} +: ${RM="rm -f"} +])# _LT_FILEUTILS_DEFAULTS + + +# _LT_SETUP +# --------- +m4_defun([_LT_SETUP], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl + +_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl +dnl +_LT_DECL([], [host_alias], [0], [The host system])dnl +_LT_DECL([], [host], [0])dnl +_LT_DECL([], [host_os], [0])dnl +dnl +_LT_DECL([], [build_alias], [0], [The build system])dnl +_LT_DECL([], [build], [0])dnl +_LT_DECL([], [build_os], [0])dnl +dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +dnl +AC_REQUIRE([AC_PROG_LN_S])dnl +test -z "$LN_S" && LN_S="ln -s" +_LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl +dnl +AC_REQUIRE([LT_CMD_MAX_LEN])dnl +_LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl +_LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl +dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl +m4_require([_LT_CMD_RELOAD])dnl +m4_require([_LT_DECL_FILECMD])dnl +m4_require([_LT_CHECK_MAGIC_METHOD])dnl +m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl +m4_require([_LT_CMD_OLD_ARCHIVE])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_WITH_SYSROOT])dnl +m4_require([_LT_CMD_TRUNCATE])dnl + +_LT_CONFIG_LIBTOOL_INIT([ +# See if we are running on zsh, and set the options that allow our +# commands through without removal of \ escapes INIT. +if test -n "\${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi +]) +if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST +fi + +_LT_CHECK_OBJDIR + +m4_require([_LT_TAG_COMPILER])dnl + +case $host_os in +aix3*) + # AIX sometimes has problems with the GCC collect2 program. For some + # reason, if we set the COLLECT_NAMES environment variable, the problems + # vanish in a puff of smoke. + if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES + fi + ;; +esac + +# Global variables: +ofile=libtool +can_build_shared=yes + +# All known linkers require a '.a' archive for static linking (except MSVC and +# ICC, which need '.lib'). +libext=a + +with_gnu_ld=$lt_cv_prog_gnu_ld + +old_CC=$CC +old_CFLAGS=$CFLAGS + +# Set sane defaults for various variables +test -z "$CC" && CC=cc +test -z "$LTCC" && LTCC=$CC +test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS +test -z "$LD" && LD=ld +test -z "$ac_objext" && ac_objext=o + +_LT_CC_BASENAME([$compiler]) + +# Only perform the check for file, if the check method requires it +test -z "$MAGIC_CMD" && MAGIC_CMD=file +case $deplibs_check_method in +file_magic*) + if test "$file_magic_cmd" = '$MAGIC_CMD'; then + _LT_PATH_MAGIC + fi + ;; +esac + +# Use C for the default configuration in the libtool script +LT_SUPPORTED_TAG([CC]) +_LT_LANG_C_CONFIG +_LT_LANG_DEFAULT_CONFIG +_LT_CONFIG_COMMANDS +])# _LT_SETUP + + +# _LT_PREPARE_SED_QUOTE_VARS +# -------------------------- +# Define a few sed substitution that help us do robust quoting. +m4_defun([_LT_PREPARE_SED_QUOTE_VARS], +[# Backslashify metacharacters that are still active within +# double-quoted strings. +sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' + +# Same as above, but do not quote variable references. +double_quote_subst='s/\([["`\\]]\)/\\\1/g' + +# Sed substitution to delay expansion of an escaped shell variable in a +# double_quote_subst'ed string. +delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' + +# Sed substitution to delay expansion of an escaped single quote. +delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' + +# Sed substitution to avoid accidental globbing in evaled expressions +no_glob_subst='s/\*/\\\*/g' +]) + +# _LT_PROG_LTMAIN +# --------------- +# Note that this code is called both from 'configure', and 'config.status' +# now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, +# 'config.status' has no value for ac_aux_dir unless we are using Automake, +# so we pass a copy along to make sure it has a sensible value anyway. +m4_defun([_LT_PROG_LTMAIN], +[m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl +_LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) +ltmain=$ac_aux_dir/ltmain.sh +])# _LT_PROG_LTMAIN + + +## ------------------------------------- ## +## Accumulate code for creating libtool. ## +## ------------------------------------- ## + +# So that we can recreate a full libtool script including additional +# tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS +# in macros and then make a single call at the end using the 'libtool' +# label. + + +# _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) +# ---------------------------------------- +# Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL_INIT], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_INIT], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_INIT]) + + +# _LT_CONFIG_LIBTOOL([COMMANDS]) +# ------------------------------ +# Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. +m4_define([_LT_CONFIG_LIBTOOL], +[m4_ifval([$1], + [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], + [$1 +])])]) + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) + + +# _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) +# ----------------------------------------------------- +m4_defun([_LT_CONFIG_SAVE_COMMANDS], +[_LT_CONFIG_LIBTOOL([$1]) +_LT_CONFIG_LIBTOOL_INIT([$2]) +]) + + +# _LT_FORMAT_COMMENT([COMMENT]) +# ----------------------------- +# Add leading comment marks to the start of each line, and a trailing +# full-stop to the whole comment if one is not present already. +m4_define([_LT_FORMAT_COMMENT], +[m4_ifval([$1], [ +m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], + [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) +)]) + + + +## ------------------------ ## +## FIXME: Eliminate VARNAME ## +## ------------------------ ## + + +# _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) +# ------------------------------------------------------------------- +# CONFIGNAME is the name given to the value in the libtool script. +# VARNAME is the (base) name used in the configure script. +# VALUE may be 0, 1 or 2 for a computed quote escaped value based on +# VARNAME. Any other value will be used directly. +m4_define([_LT_DECL], +[lt_if_append_uniq([lt_decl_varnames], [$2], [, ], + [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], + [m4_ifval([$1], [$1], [$2])]) + lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) + m4_ifval([$4], + [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) + lt_dict_add_subkey([lt_decl_dict], [$2], + [tagged?], [m4_ifval([$5], [yes], [no])])]) +]) + + +# _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) +# -------------------------------------------------------- +m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) + + +# lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_tag_varnames], +[_lt_decl_filter([tagged?], [yes], $@)]) + + +# _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) +# --------------------------------------------------------- +m4_define([_lt_decl_filter], +[m4_case([$#], + [0], [m4_fatal([$0: too few arguments: $#])], + [1], [m4_fatal([$0: too few arguments: $#: $1])], + [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], + [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], + [lt_dict_filter([lt_decl_dict], $@)])[]dnl +]) + + +# lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) +# -------------------------------------------------- +m4_define([lt_decl_quote_varnames], +[_lt_decl_filter([value], [1], $@)]) + + +# lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_dquote_varnames], +[_lt_decl_filter([value], [2], $@)]) + + +# lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) +# --------------------------------------------------- +m4_define([lt_decl_varnames_tagged], +[m4_assert([$# <= 2])dnl +_$0(m4_quote(m4_default([$1], [[, ]])), + m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), + m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) +m4_define([_lt_decl_varnames_tagged], +[m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) + + +# lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) +# ------------------------------------------------ +m4_define([lt_decl_all_varnames], +[_$0(m4_quote(m4_default([$1], [[, ]])), + m4_if([$2], [], + m4_quote(lt_decl_varnames), + m4_quote(m4_shift($@))))[]dnl +]) +m4_define([_lt_decl_all_varnames], +[lt_join($@, lt_decl_varnames_tagged([$1], + lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl +]) + + +# _LT_CONFIG_STATUS_DECLARE([VARNAME]) +# ------------------------------------ +# Quote a variable value, and forward it to 'config.status' so that its +# declaration there will have the same value as in 'configure'. VARNAME +# must have a single quote delimited value for this to work. +m4_define([_LT_CONFIG_STATUS_DECLARE], +[$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) + + +# _LT_CONFIG_STATUS_DECLARATIONS +# ------------------------------ +# We delimit libtool config variables with single quotes, so when +# we write them to config.status, we have to be sure to quote all +# embedded single quotes properly. In configure, this macro expands +# each variable declared with _LT_DECL (and _LT_TAGDECL) into: +# +# ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' +m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], +[m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), + [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAGS +# ---------------- +# Output comment and list of tags supported by the script +m4_defun([_LT_LIBTOOL_TAGS], +[_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl +available_tags='_LT_TAGS'dnl +]) + + +# _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) +# ----------------------------------- +# Extract the dictionary values for VARNAME (optionally with TAG) and +# expand to a commented shell variable setting: +# +# # Some comment about what VAR is for. +# visible_name=$lt_internal_name +m4_define([_LT_LIBTOOL_DECLARE], +[_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], + [description])))[]dnl +m4_pushdef([_libtool_name], + m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl +m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), + [0], [_libtool_name=[$]$1], + [1], [_libtool_name=$lt_[]$1], + [2], [_libtool_name=$lt_[]$1], + [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl +m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl +]) + + +# _LT_LIBTOOL_CONFIG_VARS +# ----------------------- +# Produce commented declarations of non-tagged libtool config variables +# suitable for insertion in the LIBTOOL CONFIG section of the 'libtool' +# script. Tagged libtool config variables (even for the LIBTOOL CONFIG +# section) are produced by _LT_LIBTOOL_TAG_VARS. +m4_defun([_LT_LIBTOOL_CONFIG_VARS], +[m4_foreach([_lt_var], + m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) + + +# _LT_LIBTOOL_TAG_VARS(TAG) +# ------------------------- +m4_define([_LT_LIBTOOL_TAG_VARS], +[m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), + [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) + + +# _LT_TAGVAR(VARNAME, [TAGNAME]) +# ------------------------------ +m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) + + +# _LT_CONFIG_COMMANDS +# ------------------- +# Send accumulated output to $CONFIG_STATUS. Thanks to the lists of +# variables for single and double quote escaping we saved from calls +# to _LT_DECL, we can put quote escaped variables declarations +# into 'config.status', and then the shell code to quote escape them in +# for loops in 'config.status'. Finally, any additional code accumulated +# from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. +m4_defun([_LT_CONFIG_COMMANDS], +[AC_PROVIDE_IFELSE([LT_OUTPUT], + dnl If the libtool generation code has been placed in $CONFIG_LT, + dnl instead of duplicating it all over again into config.status, + dnl then we will have config.status run $CONFIG_LT later, so it + dnl needs to know what name is stored there: + [AC_CONFIG_COMMANDS([libtool], + [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], + dnl If the libtool generation code is destined for config.status, + dnl expand the accumulated commands and init code now: + [AC_CONFIG_COMMANDS([libtool], + [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) +])#_LT_CONFIG_COMMANDS + + +# Initialize. +m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], +[ + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +sed_quote_subst='$sed_quote_subst' +double_quote_subst='$double_quote_subst' +delay_variable_subst='$delay_variable_subst' +_LT_CONFIG_STATUS_DECLARATIONS +LTCC='$LTCC' +LTCFLAGS='$LTCFLAGS' +compiler='$compiler_DEFAULT' + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +\$[]1 +_LTECHO_EOF' +} + +# Quote evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_quote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +# Double-quote double-evaled strings. +for var in lt_decl_all_varnames([[ \ +]], lt_decl_dquote_varnames); do + case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in + *[[\\\\\\\`\\"\\\$]]*) + eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ## exclude from sc_prohibit_nested_quotes + ;; + *) + eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" + ;; + esac +done + +_LT_OUTPUT_LIBTOOL_INIT +]) + +# _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) +# ------------------------------------ +# Generate a child script FILE with all initialization necessary to +# reuse the environment learned by the parent script, and make the +# file executable. If COMMENT is supplied, it is inserted after the +# '#!' sequence but before initialization text begins. After this +# macro, additional text can be appended to FILE to form the body of +# the child script. The macro ends with non-zero status if the +# file could not be fully written (such as if the disk is full). +m4_ifdef([AS_INIT_GENERATED], +[m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], +[m4_defun([_LT_GENERATED_FILE_INIT], +[m4_require([AS_PREPARE])]dnl +[m4_pushdef([AS_MESSAGE_LOG_FD])]dnl +[lt_write_fail=0 +cat >$1 <<_ASEOF || lt_write_fail=1 +#! $SHELL +# Generated by $as_me. +$2 +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$1 <<\_ASEOF || lt_write_fail=1 +AS_SHELL_SANITIZE +_AS_PREPARE +exec AS_MESSAGE_FD>&1 +_ASEOF +test 0 = "$lt_write_fail" && chmod +x $1[]dnl +m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT + +# LT_OUTPUT +# --------- +# This macro allows early generation of the libtool script (before +# AC_OUTPUT is called), incase it is used in configure for compilation +# tests. +AC_DEFUN([LT_OUTPUT], +[: ${CONFIG_LT=./config.lt} +AC_MSG_NOTICE([creating $CONFIG_LT]) +_LT_GENERATED_FILE_INIT(["$CONFIG_LT"], +[# Run this file to recreate a libtool stub with the current configuration.]) + +cat >>"$CONFIG_LT" <<\_LTEOF +lt_cl_silent=false +exec AS_MESSAGE_LOG_FD>>config.log +{ + echo + AS_BOX([Running $as_me.]) +} >&AS_MESSAGE_LOG_FD + +lt_cl_help="\ +'$as_me' creates a local libtool stub from the current configuration, +for use in further configure time tests before the real libtool is +generated. + +Usage: $[0] [[OPTIONS]] + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + +Report bugs to ." + +lt_cl_version="\ +m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl +m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) +configured by $[0], generated by m4_PACKAGE_STRING. + +Copyright (C) 2011 Free Software Foundation, Inc. +This config.lt script is free software; the Free Software Foundation +gives unlimited permision to copy, distribute and modify it." + +while test 0 != $[#] +do + case $[1] in + --version | --v* | -V ) + echo "$lt_cl_version"; exit 0 ;; + --help | --h* | -h ) + echo "$lt_cl_help"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --quiet | --q* | --silent | --s* | -q ) + lt_cl_silent=: ;; + + -*) AC_MSG_ERROR([unrecognized option: $[1] +Try '$[0] --help' for more information.]) ;; + + *) AC_MSG_ERROR([unrecognized argument: $[1] +Try '$[0] --help' for more information.]) ;; + esac + shift +done + +if $lt_cl_silent; then + exec AS_MESSAGE_FD>/dev/null +fi +_LTEOF + +cat >>"$CONFIG_LT" <<_LTEOF +_LT_OUTPUT_LIBTOOL_COMMANDS_INIT +_LTEOF + +cat >>"$CONFIG_LT" <<\_LTEOF +AC_MSG_NOTICE([creating $ofile]) +_LT_OUTPUT_LIBTOOL_COMMANDS +AS_EXIT(0) +_LTEOF +chmod +x "$CONFIG_LT" + +# configure is writing to config.log, but config.lt does its own redirection, +# appending to config.log, which fails on DOS, as config.log is still kept +# open by configure. Here we exec the FD to /dev/null, effectively closing +# config.log, so it can be properly (re)opened and appended to by config.lt. +lt_cl_success=: +test yes = "$silent" && + lt_config_lt_args="$lt_config_lt_args --quiet" +exec AS_MESSAGE_LOG_FD>/dev/null +$SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false +exec AS_MESSAGE_LOG_FD>>config.log +$lt_cl_success || AS_EXIT(1) +])# LT_OUTPUT + + +# _LT_CONFIG(TAG) +# --------------- +# If TAG is the built-in tag, create an initial libtool script with a +# default configuration from the untagged config vars. Otherwise add code +# to config.status for appending the configuration named by TAG from the +# matching tagged config vars. +m4_defun([_LT_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_CONFIG_SAVE_COMMANDS([ + m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl + m4_if(_LT_TAG, [C], [ + # See if we are running on zsh, and set the options that allow our + # commands through without removal of \ escapes. + if test -n "${ZSH_VERSION+set}"; then + setopt NO_GLOB_SUBST + fi + + cfgfile=${ofile}T + trap "$RM \"$cfgfile\"; exit 1" 1 2 15 + $RM "$cfgfile" + + cat <<_LT_EOF >> "$cfgfile" +#! $SHELL +# Generated automatically by $as_me ($PACKAGE) $VERSION +# NOTE: Changes made to this file will be lost: look at ltmain.sh. + +# Provide generalized library-building support services. +# Written by Gordon Matzigkeit, 1996 + +_LT_COPYING +_LT_LIBTOOL_TAGS + +# Configured defaults for sys_lib_dlsearch_path munging. +: \${LT_SYS_LIBRARY_PATH="$configure_time_lt_sys_library_path"} + +# ### BEGIN LIBTOOL CONFIG +_LT_LIBTOOL_CONFIG_VARS +_LT_LIBTOOL_TAG_VARS +# ### END LIBTOOL CONFIG + +_LT_EOF + + cat <<'_LT_EOF' >> "$cfgfile" + +# ### BEGIN FUNCTIONS SHARED WITH CONFIGURE + +_LT_PREPARE_MUNGE_PATH_LIST +_LT_PREPARE_CC_BASENAME + +# ### END FUNCTIONS SHARED WITH CONFIGURE + +_LT_EOF + + case $host_os in + aix3*) + cat <<\_LT_EOF >> "$cfgfile" +# AIX sometimes has problems with the GCC collect2 program. For some +# reason, if we set the COLLECT_NAMES environment variable, the problems +# vanish in a puff of smoke. +if test set != "${COLLECT_NAMES+set}"; then + COLLECT_NAMES= + export COLLECT_NAMES +fi +_LT_EOF + ;; + esac + + _LT_PROG_LTMAIN + + # We use sed instead of cat because bash on DJGPP gets confused if + # if finds mixed CR/LF and LF-only lines. Since sed operates in + # text mode, it properly converts lines to CR/LF. This bash problem + # is reportedly fixed, but why not run on old versions too? + $SED '$q' "$ltmain" >> "$cfgfile" \ + || (rm -f "$cfgfile"; exit 1) + + mv -f "$cfgfile" "$ofile" || + (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") + chmod +x "$ofile" +], +[cat <<_LT_EOF >> "$ofile" + +dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded +dnl in a comment (ie after a #). +# ### BEGIN LIBTOOL TAG CONFIG: $1 +_LT_LIBTOOL_TAG_VARS(_LT_TAG) +# ### END LIBTOOL TAG CONFIG: $1 +_LT_EOF +])dnl /m4_if +], +[m4_if([$1], [], [ + PACKAGE='$PACKAGE' + VERSION='$VERSION' + RM='$RM' + ofile='$ofile'], []) +])dnl /_LT_CONFIG_SAVE_COMMANDS +])# _LT_CONFIG + + +# LT_SUPPORTED_TAG(TAG) +# --------------------- +# Trace this macro to discover what tags are supported by the libtool +# --tag option, using: +# autoconf --trace 'LT_SUPPORTED_TAG:$1' +AC_DEFUN([LT_SUPPORTED_TAG], []) + + +# C support is built-in for now +m4_define([_LT_LANG_C_enabled], []) +m4_define([_LT_TAGS], []) + + +# LT_LANG(LANG) +# ------------- +# Enable libtool support for the given language if not already enabled. +AC_DEFUN([LT_LANG], +[AC_BEFORE([$0], [LT_OUTPUT])dnl +m4_case([$1], + [C], [_LT_LANG(C)], + [C++], [_LT_LANG(CXX)], + [Go], [_LT_LANG(GO)], + [Java], [_LT_LANG(GCJ)], + [Fortran 77], [_LT_LANG(F77)], + [Fortran], [_LT_LANG(FC)], + [Windows Resource], [_LT_LANG(RC)], + [m4_ifdef([_LT_LANG_]$1[_CONFIG], + [_LT_LANG($1)], + [m4_fatal([$0: unsupported language: "$1"])])])dnl +])# LT_LANG + + +# _LT_LANG(LANGNAME) +# ------------------ +m4_defun([_LT_LANG], +[m4_ifdef([_LT_LANG_]$1[_enabled], [], + [LT_SUPPORTED_TAG([$1])dnl + m4_append([_LT_TAGS], [$1 ])dnl + m4_define([_LT_LANG_]$1[_enabled], [])dnl + _LT_LANG_$1_CONFIG($1)])dnl +])# _LT_LANG + + +m4_ifndef([AC_PROG_GO], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_GO. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ +m4_defun([AC_PROG_GO], +[AC_LANG_PUSH(Go)dnl +AC_ARG_VAR([GOC], [Go compiler command])dnl +AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl +_AC_ARG_VAR_LDFLAGS()dnl +AC_CHECK_TOOL(GOC, gccgo) +if test -z "$GOC"; then + if test -n "$ac_tool_prefix"; then + AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) + fi +fi +if test -z "$GOC"; then + AC_CHECK_PROG(GOC, gccgo, gccgo, false) +fi +])#m4_defun +])#m4_ifndef + + +# _LT_LANG_DEFAULT_CONFIG +# ----------------------- +m4_defun([_LT_LANG_DEFAULT_CONFIG], +[AC_PROVIDE_IFELSE([AC_PROG_CXX], + [LT_LANG(CXX)], + [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) + +AC_PROVIDE_IFELSE([AC_PROG_F77], + [LT_LANG(F77)], + [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) + +AC_PROVIDE_IFELSE([AC_PROG_FC], + [LT_LANG(FC)], + [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) + +dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal +dnl pulling things in needlessly. +AC_PROVIDE_IFELSE([AC_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], + [LT_LANG(GCJ)], + [AC_PROVIDE_IFELSE([LT_PROG_GCJ], + [LT_LANG(GCJ)], + [m4_ifdef([AC_PROG_GCJ], + [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([A][M_PROG_GCJ], + [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) + m4_ifdef([LT_PROG_GCJ], + [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) + +AC_PROVIDE_IFELSE([AC_PROG_GO], + [LT_LANG(GO)], + [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) + +AC_PROVIDE_IFELSE([LT_PROG_RC], + [LT_LANG(RC)], + [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) +])# _LT_LANG_DEFAULT_CONFIG + +# Obsolete macros: +AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) +AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) +AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) +AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) +AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_CXX], []) +dnl AC_DEFUN([AC_LIBTOOL_F77], []) +dnl AC_DEFUN([AC_LIBTOOL_FC], []) +dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) +dnl AC_DEFUN([AC_LIBTOOL_RC], []) + + +# _LT_TAG_COMPILER +# ---------------- +m4_defun([_LT_TAG_COMPILER], +[AC_REQUIRE([AC_PROG_CC])dnl + +_LT_DECL([LTCC], [CC], [1], [A C compiler])dnl +_LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl +_LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl +_LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl + +# If no C compiler was specified, use CC. +LTCC=${LTCC-"$CC"} + +# If no C compiler flags were specified, use CFLAGS. +LTCFLAGS=${LTCFLAGS-"$CFLAGS"} + +# Allow CC to be a program name with arguments. +compiler=$CC +])# _LT_TAG_COMPILER + + +# _LT_COMPILER_BOILERPLATE +# ------------------------ +# Check for compiler boilerplate output or warnings with +# the simple compiler test code. +m4_defun([_LT_COMPILER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext +eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_compiler_boilerplate=`cat conftest.err` +$RM conftest* +])# _LT_COMPILER_BOILERPLATE + + +# _LT_LINKER_BOILERPLATE +# ---------------------- +# Check for linker boilerplate output or warnings with +# the simple link test code. +m4_defun([_LT_LINKER_BOILERPLATE], +[m4_require([_LT_DECL_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext +eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err +_lt_linker_boilerplate=`cat conftest.err` +$RM -r conftest* +])# _LT_LINKER_BOILERPLATE + +# _LT_REQUIRED_DARWIN_CHECKS +# ------------------------- +m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CHECK_TOOL([LIPO], [lipo], [:]) + AC_CHECK_TOOL([OTOOL], [otool], [:]) + AC_CHECK_TOOL([OTOOL64], [otool64], [:]) + _LT_DECL([], [DSYMUTIL], [1], + [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) + _LT_DECL([], [NMEDIT], [1], + [Tool to change global to local symbols on Mac OS X]) + _LT_DECL([], [LIPO], [1], + [Tool to manipulate fat objects and archives on Mac OS X]) + _LT_DECL([], [OTOOL], [1], + [ldd/readelf like tool for Mach-O binaries on Mac OS X]) + _LT_DECL([], [OTOOL64], [1], + [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) + + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "$LT_MULTI_MODULE"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + rm -rf libconftest.dylib* + echo "int foo(void){return 1;}" > conftest.c + echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ +-dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib -Wl,-single_module conftest.c 2>conftest.err + _lt_result=$? + # If there is a non-empty error log, and "single_module" + # appears in it, assume the flag caused a linker warning + if test -s conftest.err && $GREP single_module conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + # Otherwise, if the output was created with a 0 exit code from + # the compiler, it worked. + elif test -f libconftest.dylib && test 0 = "$_lt_result"; then + lt_cv_apple_cc_single_mod=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -rf libconftest.dylib* + rm -f conftest.* + fi]) + + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS=$save_LDFLAGS + ]) + + AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], + [lt_cv_ld_force_load=no + cat > conftest.c << _LT_EOF +int forced_loaded() { return 2;} +_LT_EOF + echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD + echo "$AR $AR_FLAGS libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD + $AR $AR_FLAGS libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD + echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD + $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD + cat > conftest.c << _LT_EOF +int main() { return 0;} +_LT_EOF + echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD + $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err + _lt_result=$? + if test -s conftest.err && $GREP force_load conftest.err; then + cat conftest.err >&AS_MESSAGE_LOG_FD + elif test -f conftest && test 0 = "$_lt_result" && $GREP forced_load conftest >/dev/null 2>&1; then + lt_cv_ld_force_load=yes + else + cat conftest.err >&AS_MESSAGE_LOG_FD + fi + rm -f conftest.err libconftest.a conftest conftest.c + rm -rf conftest.dSYM + ]) + case $host_os in + rhapsody* | darwin1.[[012]]) + _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + darwin*) + case $MACOSX_DEPLOYMENT_TARGET,$host in + 10.[[012]],*|,*powerpc*-darwin[[5-8]]*) + _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;; + *) + _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test yes = "$lt_cv_apple_cc_single_mod"; then + _lt_dar_single_mod='$single_module' + fi + if test yes = "$lt_cv_ld_exported_symbols_list"; then + _lt_dar_export_syms=' $wl-exported_symbols_list,$output_objdir/$libname-symbols.expsym' + else + _lt_dar_export_syms='~$NMEDIT -s $output_objdir/$libname-symbols.expsym $lib' + fi + if test : != "$DSYMUTIL" && test no = "$lt_cv_ld_force_load"; then + _lt_dsymutil='~$DSYMUTIL $lib || :' + else + _lt_dsymutil= + fi + ;; + esac +]) + + +# _LT_DARWIN_LINKER_FEATURES([TAG]) +# --------------------------------- +# Checks for linker and compiler features on darwin +m4_defun([_LT_DARWIN_LINKER_FEATURES], +[ + m4_require([_LT_REQUIRED_DARWIN_CHECKS]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_automatic, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + if test yes = "$lt_cv_ld_force_load"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience $wl-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' + m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], + [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='' + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=$_lt_dar_allow_undefined + case $cc_basename in + ifort*|nagfor*) _lt_dar_can_shared=yes ;; + *) _lt_dar_can_shared=$GCC ;; + esac + if test yes = "$_lt_dar_can_shared"; then + output_verbose_link_cmd=func_echo_all + _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil" + _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil" + _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil" + m4_if([$1], [CXX], +[ if test yes != "$lt_cv_apple_cc_single_mod"; then + _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil" + _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil" + fi +],[]) + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi +]) + +# _LT_SYS_MODULE_PATH_AIX([TAGNAME]) +# ---------------------------------- +# Links a minimal program and checks the executable +# for the system default hardcoded library path. In most cases, +# this is /usr/lib:/lib, but when the MPI compilers are used +# the location of the communication and MPI libs are included too. +# If we don't find anything, use the default library path according +# to the aix ld manual. +# Store the results from the different compilers for each TAGNAME. +# Allow to override them for all tags through lt_cv_aix_libpath. +m4_defun([_LT_SYS_MODULE_PATH_AIX], +[m4_require([_LT_DECL_SED])dnl +if test set = "${lt_cv_aix_libpath+set}"; then + aix_libpath=$lt_cv_aix_libpath +else + AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], + [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ + lt_aix_libpath_sed='[ + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\([^ ]*\) *$/\1/ + p + } + }]' + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + # Check for a 64-bit object if we didn't find anything. + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` + fi],[]) + if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then + _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=/usr/lib:/lib + fi + ]) + aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) +fi +])# _LT_SYS_MODULE_PATH_AIX + + +# _LT_SHELL_INIT(ARG) +# ------------------- +m4_define([_LT_SHELL_INIT], +[m4_divert_text([M4SH-INIT], [$1 +])])# _LT_SHELL_INIT + + + +# _LT_PROG_ECHO_BACKSLASH +# ----------------------- +# Find how we can fake an echo command that does not interpret backslash. +# In particular, with Autoconf 2.60 or later we add some code to the start +# of the generated configure script that will find a shell with a builtin +# printf (that we can use as an echo command). +m4_defun([_LT_PROG_ECHO_BACKSLASH], +[ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO +ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + +AC_MSG_CHECKING([how to print strings]) +# Test print first, because it will be a builtin if present. +if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ + test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='print -r --' +elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then + ECHO='printf %s\n' +else + # Use this function as a fallback that always works. + func_fallback_echo () + { + eval 'cat <<_LTECHO_EOF +$[]1 +_LTECHO_EOF' + } + ECHO='func_fallback_echo' +fi + +# func_echo_all arg... +# Invoke $ECHO with all args, space-separated. +func_echo_all () +{ + $ECHO "$*" +} + +case $ECHO in + printf*) AC_MSG_RESULT([printf]) ;; + print*) AC_MSG_RESULT([print -r]) ;; + *) AC_MSG_RESULT([cat]) ;; +esac + +m4_ifdef([_AS_DETECT_SUGGESTED], +[_AS_DETECT_SUGGESTED([ + test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( + ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO + ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO + PATH=/empty FPATH=/empty; export PATH FPATH + test "X`printf %s $ECHO`" = "X$ECHO" \ + || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) + +_LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) +_LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) +])# _LT_PROG_ECHO_BACKSLASH + + +# _LT_WITH_SYSROOT +# ---------------- +AC_DEFUN([_LT_WITH_SYSROOT], +[m4_require([_LT_DECL_SED])dnl +AC_MSG_CHECKING([for sysroot]) +AC_ARG_WITH([sysroot], +[AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@], + [Search for dependent libraries within DIR (or the compiler's sysroot + if not specified).])], +[], [with_sysroot=no]) + +dnl lt_sysroot will always be passed unquoted. We quote it here +dnl in case the user passed a directory name. +lt_sysroot= +case $with_sysroot in #( + yes) + if test yes = "$GCC"; then + lt_sysroot=`$CC --print-sysroot 2>/dev/null` + fi + ;; #( + /*) + lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"` + ;; #( + no|'') + ;; #( + *) + AC_MSG_RESULT([$with_sysroot]) + AC_MSG_ERROR([The sysroot must be an absolute path.]) + ;; +esac + + AC_MSG_RESULT([${lt_sysroot:-no}]) +_LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl +[dependent libraries, and where our libraries should be installed.])]) + +# _LT_ENABLE_LOCK +# --------------- +m4_defun([_LT_ENABLE_LOCK], +[AC_ARG_ENABLE([libtool-lock], + [AS_HELP_STRING([--disable-libtool-lock], + [avoid locking (might break parallel builds)])]) +test no = "$enable_libtool_lock" || enable_libtool_lock=yes + +# Some flags need to be propagated to the compiler or linker for good +# libtool support. +case $host in +ia64-*-hpux*) + # Find out what ABI is being produced by ac_compile, and set mode + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.$ac_objext` in + *ELF-32*) + HPUX_IA64_MODE=32 + ;; + *ELF-64*) + HPUX_IA64_MODE=64 + ;; + esac + fi + rm -rf conftest* + ;; +*-*-irix6*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + if test yes = "$lt_cv_prog_gnu_ld"; then + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -melf32bsmip" + ;; + *N32*) + LD="${LD-ld} -melf32bmipn32" + ;; + *64-bit*) + LD="${LD-ld} -melf64bmip" + ;; + esac + else + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + LD="${LD-ld} -32" + ;; + *N32*) + LD="${LD-ld} -n32" + ;; + *64-bit*) + LD="${LD-ld} -64" + ;; + esac + fi + fi + rm -rf conftest* + ;; + +mips64*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + emul=elf + case `$FILECMD conftest.$ac_objext` in + *32-bit*) + emul="${emul}32" + ;; + *64-bit*) + emul="${emul}64" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *MSB*) + emul="${emul}btsmip" + ;; + *LSB*) + emul="${emul}ltsmip" + ;; + esac + case `$FILECMD conftest.$ac_objext` in + *N32*) + emul="${emul}n32" + ;; + esac + LD="${LD-ld} -m $emul" + fi + rm -rf conftest* + ;; + +x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \ +s390*-*linux*|s390*-*tpf*|sparc*-*linux*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. Note that the listed cases only cover the + # situations where additional linker options are needed (such as when + # doing 32-bit compilation for a host where ld defaults to 64-bit, or + # vice versa); the common cases where no linker options are needed do + # not appear in the list. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.o` in + *32-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; + x86_64-*linux*) + case `$FILECMD conftest.o` in + *x86-64*) + LD="${LD-ld} -m elf32_x86_64" + ;; + *) + LD="${LD-ld} -m elf_i386" + ;; + esac + ;; + powerpc64le-*linux*) + LD="${LD-ld} -m elf32lppclinux" + ;; + powerpc64-*linux*) + LD="${LD-ld} -m elf32ppclinux" + ;; + s390x-*linux*) + LD="${LD-ld} -m elf_s390" + ;; + sparc64-*linux*) + LD="${LD-ld} -m elf32_sparc" + ;; + esac + ;; + *64-bit*) + case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; + x86_64-*linux*) + LD="${LD-ld} -m elf_x86_64" + ;; + powerpcle-*linux*) + LD="${LD-ld} -m elf64lppc" + ;; + powerpc-*linux*) + LD="${LD-ld} -m elf64ppc" + ;; + s390*-*linux*|s390*-*tpf*) + LD="${LD-ld} -m elf64_s390" + ;; + sparc*-*linux*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; + +*-*-sco3.2v5*) + # On SCO OpenServer 5, we need -belf to get full-featured binaries. + SAVE_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -belf" + AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, + [AC_LANG_PUSH(C) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) + AC_LANG_POP]) + if test yes != "$lt_cv_cc_needs_belf"; then + # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf + CFLAGS=$SAVE_CFLAGS + fi + ;; +*-*solaris*) + # Find out what ABI is being produced by ac_compile, and set linker + # options accordingly. + echo 'int i;' > conftest.$ac_ext + if AC_TRY_EVAL(ac_compile); then + case `$FILECMD conftest.o` in + *64-bit*) + case $lt_cv_prog_gnu_ld in + yes*) + case $host in + i?86-*-solaris*|x86_64-*-solaris*) + LD="${LD-ld} -m elf_x86_64" + ;; + sparc*-*-solaris*) + LD="${LD-ld} -m elf64_sparc" + ;; + esac + # GNU ld 2.21 introduced _sol2 emulations. Use them if available. + if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then + LD=${LD-ld}_sol2 + fi + ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; + esac + ;; + esac + fi + rm -rf conftest* + ;; +esac + +need_locks=$enable_libtool_lock +])# _LT_ENABLE_LOCK + + +# _LT_PROG_AR +# ----------- +m4_defun([_LT_PROG_AR], +[AC_CHECK_TOOLS(AR, [ar], false) +: ${AR=ar} +_LT_DECL([], [AR], [1], [The archiver]) + +# Use ARFLAGS variable as AR's operation code to sync the variable naming with +# Automake. If both AR_FLAGS and ARFLAGS are specified, AR_FLAGS should have +# higher priority because thats what people were doing historically (setting +# ARFLAGS for automake and AR_FLAGS for libtool). FIXME: Make the AR_FLAGS +# variable obsoleted/removed. + +test ${AR_FLAGS+y} || AR_FLAGS=${ARFLAGS-cr} +lt_ar_flags=$AR_FLAGS +_LT_DECL([], [lt_ar_flags], [0], [Flags to create an archive (by configure)]) + +# Make AR_FLAGS overridable by 'make ARFLAGS='. Don't try to run-time override +# by AR_FLAGS because that was never working and AR_FLAGS is about to die. +_LT_DECL([], [AR_FLAGS], [\@S|@{ARFLAGS-"\@S|@lt_ar_flags"}], + [Flags to create an archive]) + +AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], + [lt_cv_ar_at_file=no + AC_COMPILE_IFELSE([AC_LANG_PROGRAM], + [echo conftest.$ac_objext > conftest.lst + lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' + AC_TRY_EVAL([lt_ar_try]) + if test 0 -eq "$ac_status"; then + # Ensure the archiver fails upon bogus file names. + rm -f conftest.$ac_objext libconftest.a + AC_TRY_EVAL([lt_ar_try]) + if test 0 -ne "$ac_status"; then + lt_cv_ar_at_file=@ + fi + fi + rm -f conftest.* libconftest.a + ]) + ]) + +if test no = "$lt_cv_ar_at_file"; then + archiver_list_spec= +else + archiver_list_spec=$lt_cv_ar_at_file +fi +_LT_DECL([], [archiver_list_spec], [1], + [How to feed a file listing to the archiver]) +])# _LT_PROG_AR + + +# _LT_CMD_OLD_ARCHIVE +# ------------------- +m4_defun([_LT_CMD_OLD_ARCHIVE], +[_LT_PROG_AR + +AC_CHECK_TOOL(STRIP, strip, :) +test -z "$STRIP" && STRIP=: +_LT_DECL([], [STRIP], [1], [A symbol stripping program]) + +AC_CHECK_TOOL(RANLIB, ranlib, :) +test -z "$RANLIB" && RANLIB=: +_LT_DECL([], [RANLIB], [1], + [Commands used to install an old-style archive]) + +# Determine commands to create old-style static archives. +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' +old_postinstall_cmds='chmod 644 $oldlib' +old_postuninstall_cmds= + +if test -n "$RANLIB"; then + case $host_os in + bitrig* | openbsd*) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" + ;; + *) + old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" + ;; + esac + old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" +fi + +case $host_os in + darwin*) + lock_old_archive_extraction=yes ;; + *) + lock_old_archive_extraction=no ;; +esac +_LT_DECL([], [old_postinstall_cmds], [2]) +_LT_DECL([], [old_postuninstall_cmds], [2]) +_LT_TAGDECL([], [old_archive_cmds], [2], + [Commands used to build an old-style archive]) +_LT_DECL([], [lock_old_archive_extraction], [0], + [Whether to use a lock for old archive extraction]) +])# _LT_CMD_OLD_ARCHIVE + + +# _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------------------- +# Check whether the given compiler option works +AC_DEFUN([_LT_COMPILER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + lt_compiler_flag="$3" ## exclude from sc_useless_quotes_in_assignment + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + # The option is referenced via a variable to avoid confusing sed. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>conftest.err) + ac_status=$? + cat conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s "$ac_outfile"; then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings other than the usual output. + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + fi + $RM conftest* +]) + +if test yes = "[$]$2"; then + m4_if([$5], , :, [$5]) +else + m4_if([$6], , :, [$6]) +fi +])# _LT_COMPILER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) + + +# _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, +# [ACTION-SUCCESS], [ACTION-FAILURE]) +# ---------------------------------------------------- +# Check whether the given linker option works +AC_DEFUN([_LT_LINKER_OPTION], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_SED])dnl +AC_CACHE_CHECK([$1], [$2], + [$2=no + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS $3" + echo "$lt_simple_link_test_code" > conftest.$ac_ext + if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then + # The linker can only warn and ignore the option if not recognized + # So say no if there are warnings + if test -s conftest.err; then + # Append any errors to the config.log. + cat conftest.err 1>&AS_MESSAGE_LOG_FD + $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp + $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 + if diff conftest.exp conftest.er2 >/dev/null; then + $2=yes + fi + else + $2=yes + fi + fi + $RM -r conftest* + LDFLAGS=$save_LDFLAGS +]) + +if test yes = "[$]$2"; then + m4_if([$4], , :, [$4]) +else + m4_if([$5], , :, [$5]) +fi +])# _LT_LINKER_OPTION + +# Old name: +AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) + + +# LT_CMD_MAX_LEN +#--------------- +AC_DEFUN([LT_CMD_MAX_LEN], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +# find the maximum length of command line arguments +AC_MSG_CHECKING([the maximum length of command line arguments]) +AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl + i=0 + teststring=ABCD + + case $build_os in + msdosdjgpp*) + # On DJGPP, this test can blow up pretty badly due to problems in libc + # (any single argument exceeding 2000 bytes causes a buffer overrun + # during glob expansion). Even if it were fixed, the result of this + # check would be larger than it should be. + lt_cv_sys_max_cmd_len=12288; # 12K is about right + ;; + + gnu*) + # Under GNU Hurd, this test is not required because there is + # no limit to the length of command line arguments. + # Libtool will interpret -1 as no limit whatsoever + lt_cv_sys_max_cmd_len=-1; + ;; + + cygwin* | mingw* | cegcc*) + # On Win9x/ME, this test blows up -- it succeeds, but takes + # about 5 minutes as the teststring grows exponentially. + # Worse, since 9x/ME are not pre-emptively multitasking, + # you end up with a "frozen" computer, even though with patience + # the test eventually succeeds (with a max line length of 256k). + # Instead, let's just punt: use the minimum linelength reported by + # all of the supported platforms: 8192 (on NT/2K/XP). + lt_cv_sys_max_cmd_len=8192; + ;; + + mint*) + # On MiNT this can take a long time and run out of memory. + lt_cv_sys_max_cmd_len=8192; + ;; + + amigaos*) + # On AmigaOS with pdksh, this test takes hours, literally. + # So we just punt and use a minimum line length of 8192. + lt_cv_sys_max_cmd_len=8192; + ;; + + bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*) + # This has been around since 386BSD, at least. Likely further. + if test -x /sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` + elif test -x /usr/sbin/sysctl; then + lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` + else + lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs + fi + # And add a safety zone + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + ;; + + interix*) + # We know the value 262144 and hardcode it with a safety zone (like BSD) + lt_cv_sys_max_cmd_len=196608 + ;; + + os2*) + # The test takes a long time on OS/2. + lt_cv_sys_max_cmd_len=8192 + ;; + + osf*) + # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure + # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not + # nice to cause kernel panics so lets avoid the loop below. + # First set a reasonable default. + lt_cv_sys_max_cmd_len=16384 + # + if test -x /sbin/sysconfig; then + case `/sbin/sysconfig -q proc exec_disable_arg_limit` in + *1*) lt_cv_sys_max_cmd_len=-1 ;; + esac + fi + ;; + sco3.2v5*) + lt_cv_sys_max_cmd_len=102400 + ;; + sysv5* | sco5v6* | sysv4.2uw2*) + kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` + if test -n "$kargmax"; then + lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[[ ]]//'` + else + lt_cv_sys_max_cmd_len=32768 + fi + ;; + *) + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len" && \ + test undefined != "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + # Make teststring a little bigger before we do anything with it. + # a 1K string should be a reasonable start. + for i in 1 2 3 4 5 6 7 8; do + teststring=$teststring$teststring + done + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + # If test is not a shell built-in, we'll probably end up computing a + # maximum length that is only half of the actual maximum length, but + # we can't tell. + while { test X`env echo "$teststring$teststring" 2>/dev/null` \ + = "X$teststring$teststring"; } >/dev/null 2>&1 && + test 17 != "$i" # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + # Only check the string length outside the loop. + lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` + teststring= + # Add a significant safety factor because C++ compilers can tack on + # massive amounts of additional arguments before passing them to the + # linker. It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi + ;; + esac +]) +if test -n "$lt_cv_sys_max_cmd_len"; then + AC_MSG_RESULT($lt_cv_sys_max_cmd_len) +else + AC_MSG_RESULT(none) +fi +max_cmd_len=$lt_cv_sys_max_cmd_len +_LT_DECL([], [max_cmd_len], [0], + [What is the maximum length of a command?]) +])# LT_CMD_MAX_LEN + +# Old name: +AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) + + +# _LT_HEADER_DLFCN +# ---------------- +m4_defun([_LT_HEADER_DLFCN], +[AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl +])# _LT_HEADER_DLFCN + + +# _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, +# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) +# ---------------------------------------------------------------- +m4_defun([_LT_TRY_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes = "$cross_compiling"; then : + [$4] +else + lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 + lt_status=$lt_dlunknown + cat > conftest.$ac_ext <<_LT_EOF +[#line $LINENO "configure" +#include "confdefs.h" + +#if HAVE_DLFCN_H +#include +#endif + +#include + +#ifdef RTLD_GLOBAL +# define LT_DLGLOBAL RTLD_GLOBAL +#else +# ifdef DL_GLOBAL +# define LT_DLGLOBAL DL_GLOBAL +# else +# define LT_DLGLOBAL 0 +# endif +#endif + +/* We may have to define LT_DLLAZY_OR_NOW in the command line if we + find out it does not work in some platform. */ +#ifndef LT_DLLAZY_OR_NOW +# ifdef RTLD_LAZY +# define LT_DLLAZY_OR_NOW RTLD_LAZY +# else +# ifdef DL_LAZY +# define LT_DLLAZY_OR_NOW DL_LAZY +# else +# ifdef RTLD_NOW +# define LT_DLLAZY_OR_NOW RTLD_NOW +# else +# ifdef DL_NOW +# define LT_DLLAZY_OR_NOW DL_NOW +# else +# define LT_DLLAZY_OR_NOW 0 +# endif +# endif +# endif +# endif +#endif + +/* When -fvisibility=hidden is used, assume the code has been annotated + correspondingly for the symbols needed. */ +#if defined __GNUC__ && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) +int fnord () __attribute__((visibility("default"))); +#endif + +int fnord () { return 42; } +int main () +{ + void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); + int status = $lt_dlunknown; + + if (self) + { + if (dlsym (self,"fnord")) status = $lt_dlno_uscore; + else + { + if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; + else puts (dlerror ()); + } + /* dlclose (self); */ + } + else + puts (dlerror ()); + + return status; +}] +_LT_EOF + if AC_TRY_EVAL(ac_link) && test -s "conftest$ac_exeext" 2>/dev/null; then + (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null + lt_status=$? + case x$lt_status in + x$lt_dlno_uscore) $1 ;; + x$lt_dlneed_uscore) $2 ;; + x$lt_dlunknown|x*) $3 ;; + esac + else : + # compilation failed + $3 + fi +fi +rm -fr conftest* +])# _LT_TRY_DLOPEN_SELF + + +# LT_SYS_DLOPEN_SELF +# ------------------ +AC_DEFUN([LT_SYS_DLOPEN_SELF], +[m4_require([_LT_HEADER_DLFCN])dnl +if test yes != "$enable_dlopen"; then + enable_dlopen=unknown + enable_dlopen_self=unknown + enable_dlopen_self_static=unknown +else + lt_cv_dlopen=no + lt_cv_dlopen_libs= + + case $host_os in + beos*) + lt_cv_dlopen=load_add_on + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ;; + + mingw* | pw32* | cegcc*) + lt_cv_dlopen=LoadLibrary + lt_cv_dlopen_libs= + ;; + + cygwin*) + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + ;; + + darwin*) + # if libdl is installed we need to link against it + AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl],[ + lt_cv_dlopen=dyld + lt_cv_dlopen_libs= + lt_cv_dlopen_self=yes + ]) + ;; + + tpf*) + # Don't try to run any link tests for TPF. We know it's impossible + # because TPF is a cross-compiler, and we know how we open DSOs. + lt_cv_dlopen=dlopen + lt_cv_dlopen_libs= + lt_cv_dlopen_self=no + ;; + + *) + AC_CHECK_FUNC([shl_load], + [lt_cv_dlopen=shl_load], + [AC_CHECK_LIB([dld], [shl_load], + [lt_cv_dlopen=shl_load lt_cv_dlopen_libs=-ldld], + [AC_CHECK_FUNC([dlopen], + [lt_cv_dlopen=dlopen], + [AC_CHECK_LIB([dl], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-ldl], + [AC_CHECK_LIB([svld], [dlopen], + [lt_cv_dlopen=dlopen lt_cv_dlopen_libs=-lsvld], + [AC_CHECK_LIB([dld], [dld_link], + [lt_cv_dlopen=dld_link lt_cv_dlopen_libs=-ldld]) + ]) + ]) + ]) + ]) + ]) + ;; + esac + + if test no = "$lt_cv_dlopen"; then + enable_dlopen=no + else + enable_dlopen=yes + fi + + case $lt_cv_dlopen in + dlopen) + save_CPPFLAGS=$CPPFLAGS + test yes = "$ac_cv_header_dlfcn_h" && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" + + save_LDFLAGS=$LDFLAGS + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" + + save_LIBS=$LIBS + LIBS="$lt_cv_dlopen_libs $LIBS" + + AC_CACHE_CHECK([whether a program can dlopen itself], + lt_cv_dlopen_self, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, + lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) + ]) + + if test yes = "$lt_cv_dlopen_self"; then + wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" + AC_CACHE_CHECK([whether a statically linked program can dlopen itself], + lt_cv_dlopen_self_static, [dnl + _LT_TRY_DLOPEN_SELF( + lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, + lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) + ]) + fi + + CPPFLAGS=$save_CPPFLAGS + LDFLAGS=$save_LDFLAGS + LIBS=$save_LIBS + ;; + esac + + case $lt_cv_dlopen_self in + yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; + *) enable_dlopen_self=unknown ;; + esac + + case $lt_cv_dlopen_self_static in + yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; + *) enable_dlopen_self_static=unknown ;; + esac +fi +_LT_DECL([dlopen_support], [enable_dlopen], [0], + [Whether dlopen is supported]) +_LT_DECL([dlopen_self], [enable_dlopen_self], [0], + [Whether dlopen of programs is supported]) +_LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], + [Whether dlopen of statically linked programs is supported]) +])# LT_SYS_DLOPEN_SELF + +# Old name: +AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) + + +# _LT_COMPILER_C_O([TAGNAME]) +# --------------------------- +# Check to see if options -c and -o are simultaneously supported by compiler. +# This macro does not hard code the compiler like AC_PROG_CC_C_O. +m4_defun([_LT_COMPILER_C_O], +[m4_require([_LT_DECL_SED])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no + $RM -r conftest 2>/dev/null + mkdir conftest + cd conftest + mkdir out + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + lt_compiler_flag="-o out/conftest2.$ac_objext" + # Insert the option either (1) after the last *FLAGS variable, or + # (2) before a word containing "conftest.", or (3) at the end. + # Note that $ac_compile itself does not contain backslashes and begins + # with a dollar sign (not a hyphen), so the echo should work correctly. + lt_compile=`echo "$ac_compile" | $SED \ + -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ + -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ + -e 's:$: $lt_compiler_flag:'` + (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$lt_compile" 2>out/conftest.err) + ac_status=$? + cat out/conftest.err >&AS_MESSAGE_LOG_FD + echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD + if (exit $ac_status) && test -s out/conftest2.$ac_objext + then + # The compiler can only warn and ignore the option if not recognized + # So say no if there are warnings + $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp + $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 + if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then + _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + fi + fi + chmod u+w . 2>&AS_MESSAGE_LOG_FD + $RM conftest* + # SGI C++ compiler will create directory out/ii_files/ for + # template instantiation + test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files + $RM out/* && rmdir out + cd .. + $RM -r conftest + $RM conftest* +]) +_LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], + [Does compiler simultaneously support -c and -o options?]) +])# _LT_COMPILER_C_O + + +# _LT_COMPILER_FILE_LOCKS([TAGNAME]) +# ---------------------------------- +# Check to see if we can do hard links to lock some files if needed +m4_defun([_LT_COMPILER_FILE_LOCKS], +[m4_require([_LT_ENABLE_LOCK])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +_LT_COMPILER_C_O([$1]) + +hard_links=nottested +if test no = "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" && test no != "$need_locks"; then + # do not overwrite the value of need_locks provided by the user + AC_MSG_CHECKING([if we can lock with hard links]) + hard_links=yes + $RM conftest* + ln conftest.a conftest.b 2>/dev/null && hard_links=no + touch conftest.a + ln conftest.a conftest.b 2>&5 || hard_links=no + ln conftest.a conftest.b 2>/dev/null && hard_links=no + AC_MSG_RESULT([$hard_links]) + if test no = "$hard_links"; then + AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) + need_locks=warn + fi +else + need_locks=no +fi +_LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) +])# _LT_COMPILER_FILE_LOCKS + + +# _LT_CHECK_OBJDIR +# ---------------- +m4_defun([_LT_CHECK_OBJDIR], +[AC_CACHE_CHECK([for objdir], [lt_cv_objdir], +[rm -f .libs 2>/dev/null +mkdir .libs 2>/dev/null +if test -d .libs; then + lt_cv_objdir=.libs +else + # MS-DOS does not allow filenames that begin with a dot. + lt_cv_objdir=_libs +fi +rmdir .libs 2>/dev/null]) +objdir=$lt_cv_objdir +_LT_DECL([], [objdir], [0], + [The name of the directory that contains temporary libtool files])dnl +m4_pattern_allow([LT_OBJDIR])dnl +AC_DEFINE_UNQUOTED([LT_OBJDIR], "$lt_cv_objdir/", + [Define to the sub-directory where libtool stores uninstalled libraries.]) +])# _LT_CHECK_OBJDIR + + +# _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) +# -------------------------------------- +# Check hardcoding attributes. +m4_defun([_LT_LINKER_HARDCODE_LIBPATH], +[AC_MSG_CHECKING([how to hardcode library paths into programs]) +_LT_TAGVAR(hardcode_action, $1)= +if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || + test -n "$_LT_TAGVAR(runpath_var, $1)" || + test yes = "$_LT_TAGVAR(hardcode_automatic, $1)"; then + + # We can hardcode non-existent directories. + if test no != "$_LT_TAGVAR(hardcode_direct, $1)" && + # If the only mechanism to avoid hardcoding is shlibpath_var, we + # have to relink, otherwise we might link with an installed library + # when we should be linking with a yet-to-be-installed one + ## test no != "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" && + test no != "$_LT_TAGVAR(hardcode_minus_L, $1)"; then + # Linking always hardcodes the temporary library directory. + _LT_TAGVAR(hardcode_action, $1)=relink + else + # We can link without hardcoding, and we can hardcode nonexisting dirs. + _LT_TAGVAR(hardcode_action, $1)=immediate + fi +else + # We cannot hardcode anything, or else we can only hardcode existing + # directories. + _LT_TAGVAR(hardcode_action, $1)=unsupported +fi +AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) + +if test relink = "$_LT_TAGVAR(hardcode_action, $1)" || + test yes = "$_LT_TAGVAR(inherit_rpath, $1)"; then + # Fast installation is not supported + enable_fast_install=no +elif test yes = "$shlibpath_overrides_runpath" || + test no = "$enable_shared"; then + # Fast installation is not necessary + enable_fast_install=needless +fi +_LT_TAGDECL([], [hardcode_action], [0], + [How to hardcode a shared library path into an executable]) +])# _LT_LINKER_HARDCODE_LIBPATH + + +# _LT_CMD_STRIPLIB +# ---------------- +m4_defun([_LT_CMD_STRIPLIB], +[m4_require([_LT_DECL_EGREP]) +striplib= +old_striplib= +AC_MSG_CHECKING([whether stripping libraries is possible]) +if test -z "$STRIP"; then + AC_MSG_RESULT([no]) +else + if $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + case $host_os in + darwin*) + # FIXME - insert some real tests, host_os isn't really good enough + striplib="$STRIP -x" + old_striplib="$STRIP -S" + AC_MSG_RESULT([yes]) + ;; + freebsd*) + if $STRIP -V 2>&1 | $GREP "elftoolchain" >/dev/null; then + old_striplib="$STRIP --strip-debug" + striplib="$STRIP --strip-unneeded" + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + ;; + *) + AC_MSG_RESULT([no]) + ;; + esac + fi +fi +_LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) +_LT_DECL([], [striplib], [1]) +])# _LT_CMD_STRIPLIB + + +# _LT_PREPARE_MUNGE_PATH_LIST +# --------------------------- +# Make sure func_munge_path_list() is defined correctly. +m4_defun([_LT_PREPARE_MUNGE_PATH_LIST], +[[# func_munge_path_list VARIABLE PATH +# ----------------------------------- +# VARIABLE is name of variable containing _space_ separated list of +# directories to be munged by the contents of PATH, which is string +# having a format: +# "DIR[:DIR]:" +# string "DIR[ DIR]" will be prepended to VARIABLE +# ":DIR[:DIR]" +# string "DIR[ DIR]" will be appended to VARIABLE +# "DIRP[:DIRP]::[DIRA:]DIRA" +# string "DIRP[ DIRP]" will be prepended to VARIABLE and string +# "DIRA[ DIRA]" will be appended to VARIABLE +# "DIR[:DIR]" +# VARIABLE will be replaced by "DIR[ DIR]" +func_munge_path_list () +{ + case x@S|@2 in + x) + ;; + *:) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'` \@S|@@S|@1\" + ;; + x:*) + eval @S|@1=\"\@S|@@S|@1 `$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + *::*) + eval @S|@1=\"\@S|@@S|@1\ `$ECHO @S|@2 | $SED -e 's/.*:://' -e 's/:/ /g'`\" + eval @S|@1=\"`$ECHO @S|@2 | $SED -e 's/::.*//' -e 's/:/ /g'`\ \@S|@@S|@1\" + ;; + *) + eval @S|@1=\"`$ECHO @S|@2 | $SED 's/:/ /g'`\" + ;; + esac +} +]])# _LT_PREPARE_PATH_LIST + + +# _LT_SYS_DYNAMIC_LINKER([TAG]) +# ----------------------------- +# PORTME Fill in your ld.so characteristics +m4_defun([_LT_SYS_DYNAMIC_LINKER], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_OBJDUMP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CHECK_SHELL_FEATURES])dnl +m4_require([_LT_PREPARE_MUNGE_PATH_LIST])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) +m4_if([$1], + [], [ +if test yes = "$GCC"; then + case $host_os in + darwin*) lt_awk_arg='/^libraries:/,/LR/' ;; + *) lt_awk_arg='/^libraries:/' ;; + esac + case $host_os in + mingw* | cegcc*) lt_sed_strip_eq='s|=\([[A-Za-z]]:\)|\1|g' ;; + *) lt_sed_strip_eq='s|=/|/|g' ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` + case $lt_search_path_spec in + *\;*) + # if the path contains ";" then we assume it to be the separator + # otherwise default to the standard path separator (i.e. ":") - it is + # assumed that no part of a normal pathname contains ";" but that should + # okay in the real world where ";" in dirpaths is itself problematic. + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` + ;; + *) + lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` + ;; + esac + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary... + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=/`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + # ...but if some path component already ends with the multilib dir we assume + # that all is fine and trust -print-search-dirs as is (GCC 4.2? or newer). + case "$lt_multi_os_dir; $lt_search_path_spec " in + "/; "* | "/.; "* | "/./; "* | *"$lt_multi_os_dir "* | *"$lt_multi_os_dir/ "*) + lt_multi_os_dir= + ;; + esac + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path$lt_multi_os_dir" + elif test -n "$lt_multi_os_dir"; then + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' +BEGIN {RS = " "; FS = "/|\n";} { + lt_foo = ""; + lt_count = 0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo = "/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + # AWK program above erroneously prepends '/' to C:/dos/paths + # for these hosts. + case $host_os in + mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ + $SED 's|/\([[A-Za-z]]:\)|\1|g'` ;; + esac + sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` +else + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" +fi]) +library_names_spec= +libname_spec='lib$name' +soname_spec= +shrext_cmds=.so +postinstall_cmds= +postuninstall_cmds= +finish_cmds= +finish_eval= +shlibpath_var= +shlibpath_overrides_runpath=unknown +version_type=none +dynamic_linker="$host_os ld.so" +sys_lib_dlsearch_path_spec="/lib /usr/lib" +need_lib_prefix=unknown +hardcode_into_libs=no + +# when you set need_version to no, make sure it does not cause -set_version +# flags to be left without arguments +need_version=unknown + +AC_ARG_VAR([LT_SYS_LIBRARY_PATH], +[User-defined run-time library search path.]) + +case $host_os in +aix3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname.a' + shlibpath_var=LIBPATH + + # AIX 3 has no versioning support, so we append a major version to the name. + soname_spec='$libname$release$shared_ext$major' + ;; + +aix[[4-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + hardcode_into_libs=yes + if test ia64 = "$host_cpu"; then + # AIX 5 supports IA64 + library_names_spec='$libname$release$shared_ext$major $libname$release$shared_ext$versuffix $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + else + # With GCC up to 2.95.x, collect2 would create an import file + # for dependence libraries. The import file would start with + # the line '#! .'. This would cause the generated library to + # depend on '.', always an invalid library. This was fixed in + # development snapshots of GCC prior to 3.0. + case $host_os in + aix4 | aix4.[[01]] | aix4.[[01]].*) + if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' + echo ' yes ' + echo '#endif'; } | $CC -E - | $GREP yes > /dev/null; then + : + else + can_build_shared=no + fi + ;; + esac + # Using Import Files as archive members, it is possible to support + # filename-based versioning of shared library archives on AIX. While + # this would work for both with and without runtime linking, it will + # prevent static linking of such archives. So we do filename-based + # shared library versioning with .so extension only, which is used + # when both runtime linking and shared linking is enabled. + # Unfortunately, runtime linking may impact performance, so we do + # not want this to be the default eventually. Also, we use the + # versioned .so libs for executables only if there is the -brtl + # linker flag in LDFLAGS as well, or --with-aix-soname=svr4 only. + # To allow for filename-based versioning support, we need to create + # libNAME.so.V as an archive file, containing: + # *) an Import File, referring to the versioned filename of the + # archive as well as the shared archive member, telling the + # bitwidth (32 or 64) of that shared object, and providing the + # list of exported symbols of that shared object, eventually + # decorated with the 'weak' keyword + # *) the shared object with the F_LOADONLY flag set, to really avoid + # it being seen by the linker. + # At run time we better use the real file rather than another symlink, + # but for link time we create the symlink libNAME.so -> libNAME.so.V + + case $with_aix_soname,$aix_use_runtimelinking in + # AIX (on Power*) has no versioning support, so currently we cannot hardcode correct + # soname into executable. Probably we can add versioning support to + # collect2, so additional links can be useful in future. + aix,yes) # traditional libtool + dynamic_linker='AIX unversionable lib.so' + # If using run time linking (on AIX 4.2 or later) use lib.so + # instead of lib.a to let people know that these are not + # typical AIX shared libraries. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + aix,no) # traditional AIX only + dynamic_linker='AIX lib.a[(]lib.so.V[)]' + # We preserve .a as extension for shared libraries through AIX4.2 + # and later when we are not doing run time linking. + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + ;; + svr4,*) # full svr4 only + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,yes) # both, prefer svr4 + dynamic_linker="AIX lib.so.V[(]$shared_archive_member_spec.o[)], lib.a[(]lib.so.V[)]" + library_names_spec='$libname$release$shared_ext$major $libname$shared_ext' + # unpreferred sharedlib libNAME.a needs extra handling + postinstall_cmds='test -n "$linkname" || linkname="$realname"~func_stripname "" ".so" "$linkname"~$install_shared_prog "$dir/$func_stripname_result.$libext" "$destdir/$func_stripname_result.$libext"~test -z "$tstripme" || test -z "$striplib" || $striplib "$destdir/$func_stripname_result.$libext"' + postuninstall_cmds='for n in $library_names $old_library; do :; done~func_stripname "" ".so" "$n"~test "$func_stripname_result" = "$n" || func_append rmfiles " $odir/$func_stripname_result.$libext"' + # We do not specify a path in Import Files, so LIBPATH fires. + shlibpath_overrides_runpath=yes + ;; + *,no) # both, prefer aix + dynamic_linker="AIX lib.a[(]lib.so.V[)], lib.so.V[(]$shared_archive_member_spec.o[)]" + library_names_spec='$libname$release.a $libname.a' + soname_spec='$libname$release$shared_ext$major' + # unpreferred sharedlib libNAME.so.V and symlink libNAME.so need extra handling + postinstall_cmds='test -z "$dlname" || $install_shared_prog $dir/$dlname $destdir/$dlname~test -z "$tstripme" || test -z "$striplib" || $striplib $destdir/$dlname~test -n "$linkname" || linkname=$realname~func_stripname "" ".a" "$linkname"~(cd "$destdir" && $LN_S -f $dlname $func_stripname_result.so)' + postuninstall_cmds='test -z "$dlname" || func_append rmfiles " $odir/$dlname"~for n in $old_library $library_names; do :; done~func_stripname "" ".a" "$n"~func_append rmfiles " $odir/$func_stripname_result.so"' + ;; + esac + shlibpath_var=LIBPATH + fi + ;; + +amigaos*) + case $host_cpu in + powerpc) + # Since July 2007 AmigaOS4 officially supports .so libraries. + # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + ;; + m68k) + library_names_spec='$libname.ixlibrary $libname.a' + # Create ${libname}_ixlibrary.a entries in /sys/libs. + finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' + ;; + esac + ;; + +beos*) + library_names_spec='$libname$shared_ext' + dynamic_linker="$host_os ld.so" + shlibpath_var=LIBRARY_PATH + ;; + +bsdi[[45]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" + sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" + # the default ld.so.conf also contains /usr/contrib/lib and + # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow + # libtool to hard-code these into programs + ;; + +cygwin* | mingw* | pw32* | cegcc*) + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + + case $GCC,$cc_basename in + yes,*) + # gcc + library_names_spec='$libname.dll.a' + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + + case $host_os in + cygwin*) + # Cygwin DLLs use 'cyg' prefix rather than 'lib' + soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) + ;; + mingw* | cegcc*) + # MinGW DLLs use traditional 'lib' prefix + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + pw32*) + # pw32 DLLs use 'pw' prefix rather than 'lib' + library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + ;; + esac + dynamic_linker='Win32 ld.exe' + ;; + + *,cl* | *,icl*) + # Native MSVC or ICC + libname_spec='$name' + soname_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext' + library_names_spec='$libname.dll.lib' + + case $build_os in + mingw*) + sys_lib_search_path_spec= + lt_save_ifs=$IFS + IFS=';' + for lt_path in $LIB + do + IFS=$lt_save_ifs + # Let DOS variable expansion print the short 8.3 style file name. + lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` + sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" + done + IFS=$lt_save_ifs + # Convert to MSYS style. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` + ;; + cygwin*) + # Convert to unix form, then to dos form, then back to unix form + # but this time dos style (no spaces!) so that the unix form looks + # like /cygdrive/c/PROGRA~1:/cygdr... + sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` + sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` + sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + ;; + *) + sys_lib_search_path_spec=$LIB + if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then + # It is most probably a Windows format PATH. + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + else + sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + fi + # FIXME: find the short name or the path components, as spaces are + # common. (e.g. "Program Files" -> "PROGRA~1") + ;; + esac + + # DLL is installed to $(libdir)/../bin by postinstall_cmds + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; echo \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + shlibpath_overrides_runpath=yes + dynamic_linker='Win32 link.exe' + ;; + + *) + # Assume MSVC and ICC wrapper + library_names_spec='$libname`echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext $libname.lib' + dynamic_linker='Win32 ld.exe' + ;; + esac + # FIXME: first we should search . and the directory the executable is in + shlibpath_var=PATH + ;; + +darwin* | rhapsody*) + dynamic_linker="$host_os dyld" + version_type=darwin + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$major$shared_ext $libname$shared_ext' + soname_spec='$libname$release$major$shared_ext' + shlibpath_overrides_runpath=yes + shlibpath_var=DYLD_LIBRARY_PATH + shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' +m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) + sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' + ;; + +dgux*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +freebsd* | dragonfly* | midnightbsd*) + # DragonFly does not have aout. When/if they implement a new + # versioning mechanism, adjust this. + if test -x /usr/bin/objformat; then + objformat=`/usr/bin/objformat` + else + case $host_os in + freebsd[[23]].*) objformat=aout ;; + *) objformat=elf ;; + esac + fi + version_type=freebsd-$objformat + case $version_type in + freebsd-elf*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + need_version=no + need_lib_prefix=no + ;; + freebsd-*) + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + need_version=yes + ;; + esac + shlibpath_var=LD_LIBRARY_PATH + case $host_os in + freebsd2.*) + shlibpath_overrides_runpath=yes + ;; + freebsd3.[[01]]* | freebsdelf3.[[01]]*) + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ + freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + *) # from 4.6 on, and DragonFly + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + esac + ;; + +haiku*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + dynamic_linker="$host_os runtime_loader" + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LIBRARY_PATH + shlibpath_overrides_runpath=no + sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' + hardcode_into_libs=yes + ;; + +hpux9* | hpux10* | hpux11*) + # Give a soname corresponding to the major version so that dld.sl refuses to + # link against other versions. + version_type=sunos + need_lib_prefix=no + need_version=no + case $host_cpu in + ia64*) + shrext_cmds='.so' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.so" + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + if test 32 = "$HPUX_IA64_MODE"; then + sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" + sys_lib_dlsearch_path_spec=/usr/lib/hpux32 + else + sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" + sys_lib_dlsearch_path_spec=/usr/lib/hpux64 + fi + ;; + hppa*64*) + shrext_cmds='.sl' + hardcode_into_libs=yes + dynamic_linker="$host_os dld.sl" + shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH + shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + *) + shrext_cmds='.sl' + dynamic_linker="$host_os dld.sl" + shlibpath_var=SHLIB_PATH + shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + ;; + esac + # HP-UX runs *really* slowly unless shared libraries are mode 555, ... + postinstall_cmds='chmod 555 $lib' + # or fails outright, so override atomically: + install_override_mode=555 + ;; + +interix[[3-9]]*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +irix5* | irix6* | nonstopux*) + case $host_os in + nonstopux*) version_type=nonstopux ;; + *) + if test yes = "$lt_cv_prog_gnu_ld"; then + version_type=linux # correct to gnu/linux during the next big refactor + else + version_type=irix + fi ;; + esac + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$release$shared_ext $libname$shared_ext' + case $host_os in + irix5* | nonstopux*) + libsuff= shlibsuff= + ;; + *) + case $LD in # libtool.m4 will add one of these switches to LD + *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") + libsuff= shlibsuff= libmagic=32-bit;; + *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") + libsuff=32 shlibsuff=N32 libmagic=N32;; + *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") + libsuff=64 shlibsuff=64 libmagic=64-bit;; + *) libsuff= shlibsuff= libmagic=never-match;; + esac + ;; + esac + shlibpath_var=LD_LIBRARY${shlibsuff}_PATH + shlibpath_overrides_runpath=no + sys_lib_search_path_spec="/usr/lib$libsuff /lib$libsuff /usr/local/lib$libsuff" + sys_lib_dlsearch_path_spec="/usr/lib$libsuff /lib$libsuff" + hardcode_into_libs=yes + ;; + +# No shared lib support for Linux oldld, aout, or coff. +linux*oldld* | linux*aout* | linux*coff*) + dynamic_linker=no + ;; + +linux*android*) + version_type=none # Android doesn't support versioned libraries. + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext' + soname_spec='$libname$release$shared_ext' + finish_cmds= + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + dynamic_linker='Android linker' + # Don't embed -rpath directories since the linker doesn't support them. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + + # Some binutils ld are patched to set DT_RUNPATH + AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], + [lt_cv_shlibpath_overrides_runpath=no + save_LDFLAGS=$LDFLAGS + save_libdir=$libdir + eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ + LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], + [lt_cv_shlibpath_overrides_runpath=yes])]) + LDFLAGS=$save_LDFLAGS + libdir=$save_libdir + ]) + shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath + + # This implies no fast_install, which is unacceptable. + # Some rework will be needed to allow for fast_install + # before this can be enabled. + hardcode_into_libs=yes + + # Ideally, we could use ldconfig to report *all* directores which are + # searched for libraries, however this is still not possible. Aside from not + # being certain /sbin/ldconfig is available, command + # 'ldconfig -N -X -v | grep ^/' on 64bit Fedora does not report /usr/lib64, + # even though it is searched at run-time. Try to do the best guess by + # appending ld.so.conf contents (and includes) to the search path. + if test -f /etc/ld.so.conf; then + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` + sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" + fi + + # We used to test for /lib/ld.so.1 and disable shared libraries on + # powerpc, because MkLinux only supported shared libraries with the + # GNU dynamic linker. Since this was broken with cross compilers, + # most powerpc-linux boxes support dynamic linking these days and + # people can always --disable-shared, the test was removed, and we + # assume the GNU/Linux dynamic linker is in use. + dynamic_linker='GNU/Linux ld.so' + ;; + +netbsdelf*-gnu) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='NetBSD ld.elf_so' + ;; + +netbsd*) + version_type=sunos + need_lib_prefix=no + need_version=no + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + dynamic_linker='NetBSD (a.out) ld.so' + else + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + dynamic_linker='NetBSD ld.elf_so' + fi + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + ;; + +newsos6) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +*nto* | *qnx*) + version_type=qnx + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + dynamic_linker='ldqnx.so' + ;; + +openbsd* | bitrig*) + version_type=sunos + sys_lib_dlsearch_path_spec=/usr/lib + need_lib_prefix=no + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + need_version=no + else + need_version=yes + fi + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + ;; + +os2*) + libname_spec='$name' + version_type=windows + shrext_cmds=.dll + need_version=no + need_lib_prefix=no + # OS/2 can only load a DLL with a base name of 8 characters or less. + soname_spec='`test -n "$os2dllname" && libname="$os2dllname"; + v=$($ECHO $release$versuffix | tr -d .-); + n=$($ECHO $libname | cut -b -$((8 - ${#v})) | tr . _); + $ECHO $n$v`$shared_ext' + library_names_spec='${libname}_dll.$libext' + dynamic_linker='OS/2 ld.exe' + shlibpath_var=BEGINLIBPATH + sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + postinstall_cmds='base_file=`basename \$file`~ + dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\$base_file'\''i; $ECHO \$dlname'\''`~ + dldir=$destdir/`dirname \$dlpath`~ + test -d \$dldir || mkdir -p \$dldir~ + $install_prog $dir/$dlname \$dldir/$dlname~ + chmod a+x \$dldir/$dlname~ + if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then + eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; + fi' + postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; $ECHO \$dlname'\''`~ + dlpath=$dir/\$dldll~ + $RM \$dlpath' + ;; + +osf3* | osf4* | osf5*) + version_type=osf + need_lib_prefix=no + need_version=no + soname_spec='$libname$release$shared_ext$major' + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" + sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec + ;; + +rdos*) + dynamic_linker=no + ;; + +solaris*) + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + # ldd complains unless libraries are executable + postinstall_cmds='chmod +x $lib' + ;; + +sunos4*) + version_type=sunos + library_names_spec='$libname$release$shared_ext$versuffix $libname$shared_ext$versuffix' + finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + if test yes = "$with_gnu_ld"; then + need_lib_prefix=no + fi + need_version=yes + ;; + +sysv4 | sysv4.3*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + case $host_vendor in + sni) + shlibpath_overrides_runpath=no + need_lib_prefix=no + runpath_var=LD_RUN_PATH + ;; + siemens) + need_lib_prefix=no + ;; + motorola) + need_lib_prefix=no + need_version=no + shlibpath_overrides_runpath=no + sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' + ;; + esac + ;; + +sysv4*MP*) + if test -d /usr/nec; then + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$shared_ext.$versuffix $libname$shared_ext.$major $libname$shared_ext' + soname_spec='$libname$shared_ext.$major' + shlibpath_var=LD_LIBRARY_PATH + fi + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + version_type=sco + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=yes + hardcode_into_libs=yes + if test yes = "$with_gnu_ld"; then + sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' + else + sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' + case $host_os in + sco3.2v5*) + sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" + ;; + esac + fi + sys_lib_dlsearch_path_spec='/usr/lib' + ;; + +tpf*) + # TPF is a cross-target only. Preferred cross-host = GNU/Linux. + version_type=linux # correct to gnu/linux during the next big refactor + need_lib_prefix=no + need_version=no + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + shlibpath_var=LD_LIBRARY_PATH + shlibpath_overrides_runpath=no + hardcode_into_libs=yes + ;; + +uts4*) + version_type=linux # correct to gnu/linux during the next big refactor + library_names_spec='$libname$release$shared_ext$versuffix $libname$release$shared_ext$major $libname$shared_ext' + soname_spec='$libname$release$shared_ext$major' + shlibpath_var=LD_LIBRARY_PATH + ;; + +*) + dynamic_linker=no + ;; +esac +AC_MSG_RESULT([$dynamic_linker]) +test no = "$dynamic_linker" && can_build_shared=no + +variables_saved_for_relink="PATH $shlibpath_var $runpath_var" +if test yes = "$GCC"; then + variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" +fi + +if test set = "${lt_cv_sys_lib_search_path_spec+set}"; then + sys_lib_search_path_spec=$lt_cv_sys_lib_search_path_spec +fi + +if test set = "${lt_cv_sys_lib_dlsearch_path_spec+set}"; then + sys_lib_dlsearch_path_spec=$lt_cv_sys_lib_dlsearch_path_spec +fi + +# remember unaugmented sys_lib_dlsearch_path content for libtool script decls... +configure_time_dlsearch_path=$sys_lib_dlsearch_path_spec + +# ... but it needs LT_SYS_LIBRARY_PATH munging for other configure-time code +func_munge_path_list sys_lib_dlsearch_path_spec "$LT_SYS_LIBRARY_PATH" + +# to be used as default LT_SYS_LIBRARY_PATH value in generated libtool +configure_time_lt_sys_library_path=$LT_SYS_LIBRARY_PATH + +_LT_DECL([], [variables_saved_for_relink], [1], + [Variables whose values should be saved in libtool wrapper scripts and + restored at link time]) +_LT_DECL([], [need_lib_prefix], [0], + [Do we need the "lib" prefix for modules?]) +_LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) +_LT_DECL([], [version_type], [0], [Library versioning type]) +_LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) +_LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) +_LT_DECL([], [shlibpath_overrides_runpath], [0], + [Is shlibpath searched before the hard-coded library search path?]) +_LT_DECL([], [libname_spec], [1], [Format of library name prefix]) +_LT_DECL([], [library_names_spec], [1], + [[List of archive names. First name is the real one, the rest are links. + The last name is the one that the linker finds with -lNAME]]) +_LT_DECL([], [soname_spec], [1], + [[The coded name of the library, if different from the real name]]) +_LT_DECL([], [install_override_mode], [1], + [Permission mode override for installation of shared libraries]) +_LT_DECL([], [postinstall_cmds], [2], + [Command to use after installation of a shared archive]) +_LT_DECL([], [postuninstall_cmds], [2], + [Command to use after uninstallation of a shared archive]) +_LT_DECL([], [finish_cmds], [2], + [Commands used to finish a libtool library installation in a directory]) +_LT_DECL([], [finish_eval], [1], + [[As "finish_cmds", except a single script fragment to be evaled but + not shown]]) +_LT_DECL([], [hardcode_into_libs], [0], + [Whether we should hardcode library paths into libraries]) +_LT_DECL([], [sys_lib_search_path_spec], [2], + [Compile-time system search path for libraries]) +_LT_DECL([sys_lib_dlsearch_path_spec], [configure_time_dlsearch_path], [2], + [Detected run-time system search path for libraries]) +_LT_DECL([], [configure_time_lt_sys_library_path], [2], + [Explicit LT_SYS_LIBRARY_PATH set during ./configure time]) +])# _LT_SYS_DYNAMIC_LINKER + + +# _LT_PATH_TOOL_PREFIX(TOOL) +# -------------------------- +# find a file program that can recognize shared library +AC_DEFUN([_LT_PATH_TOOL_PREFIX], +[m4_require([_LT_DECL_EGREP])dnl +AC_MSG_CHECKING([for $1]) +AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, +[case $MAGIC_CMD in +[[\\/*] | ?:[\\/]*]) + lt_cv_path_MAGIC_CMD=$MAGIC_CMD # Let the user override the test with a path. + ;; +*) + lt_save_MAGIC_CMD=$MAGIC_CMD + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR +dnl $ac_dummy forces splitting on constant user-supplied paths. +dnl POSIX.2 word splitting is done only on the output of word expansions, +dnl not every word. This closes a longstanding sh security hole. + ac_dummy="m4_if([$2], , $PATH, [$2])" + for ac_dir in $ac_dummy; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$1"; then + lt_cv_path_MAGIC_CMD=$ac_dir/"$1" + if test -n "$file_magic_test_file"; then + case $deplibs_check_method in + "file_magic "*) + file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` + MAGIC_CMD=$lt_cv_path_MAGIC_CMD + if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | + $EGREP "$file_magic_regex" > /dev/null; then + : + else + cat <<_LT_EOF 1>&2 + +*** Warning: the command libtool uses to detect shared libraries, +*** $file_magic_cmd, produces output that libtool cannot recognize. +*** The result is that libtool may fail to recognize shared libraries +*** as such. This will affect the creation of libtool libraries that +*** depend on shared libraries, but programs linked with such libtool +*** libraries will work regardless of this problem. Nevertheless, you +*** may want to report the problem to your system manager and/or to +*** bug-libtool@gnu.org + +_LT_EOF + fi ;; + esac + fi + break + fi + done + IFS=$lt_save_ifs + MAGIC_CMD=$lt_save_MAGIC_CMD + ;; +esac]) +MAGIC_CMD=$lt_cv_path_MAGIC_CMD +if test -n "$MAGIC_CMD"; then + AC_MSG_RESULT($MAGIC_CMD) +else + AC_MSG_RESULT(no) +fi +_LT_DECL([], [MAGIC_CMD], [0], + [Used to examine libraries when file_magic_cmd begins with "file"])dnl +])# _LT_PATH_TOOL_PREFIX + +# Old name: +AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) + + +# _LT_PATH_MAGIC +# -------------- +# find a file program that can recognize a shared library +m4_defun([_LT_PATH_MAGIC], +[_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) +if test -z "$lt_cv_path_MAGIC_CMD"; then + if test -n "$ac_tool_prefix"; then + _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) + else + MAGIC_CMD=: + fi +fi +])# _LT_PATH_MAGIC + + +# LT_PATH_LD +# ---------- +# find the pathname to the GNU or non-GNU linker +AC_DEFUN([LT_PATH_LD], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PROG_ECHO_BACKSLASH])dnl + +AC_ARG_WITH([gnu-ld], + [AS_HELP_STRING([--with-gnu-ld], + [assume the C compiler uses GNU ld @<:@default=no@:>@])], + [test no = "$withval" || with_gnu_ld=yes], + [with_gnu_ld=no])dnl + +ac_prog=ld +if test yes = "$GCC"; then + # Check if gcc -print-prog-name=ld gives a path. + AC_MSG_CHECKING([for ld used by $CC]) + case $host in + *-*-mingw*) + # gcc leaves a trailing carriage return, which upsets mingw + ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; + *) + ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; + esac + case $ac_prog in + # Accept absolute paths. + [[\\/]]* | ?:[[\\/]]*) + re_direlt='/[[^/]][[^/]]*/\.\./' + # Canonicalize the pathname of ld + ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` + while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do + ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` + done + test -z "$LD" && LD=$ac_prog + ;; + "") + # If it fails, then pretend we aren't using GCC. + ac_prog=ld + ;; + *) + # If it is relative, then search for the first ld in PATH. + with_gnu_ld=unknown + ;; + esac +elif test yes = "$with_gnu_ld"; then + AC_MSG_CHECKING([for GNU ld]) +else + AC_MSG_CHECKING([for non-GNU ld]) +fi +AC_CACHE_VAL(lt_cv_path_LD, +[if test -z "$LD"; then + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then + lt_cv_path_LD=$ac_dir/$ac_prog + # Check to see if the program is GNU ld. I'd rather use --version, + # but apparently some variants of GNU ld only accept -v. + # Break only if it was the GNU/non-GNU ld that we prefer. + case `"$lt_cv_path_LD" -v 2>&1 &1 conftest.i +cat conftest.i conftest.i >conftest2.i +: ${lt_DD:=$DD} +AC_PATH_PROGS_FEATURE_CHECK([lt_DD], [dd], +[if "$ac_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && ac_cv_path_lt_DD="$ac_path_lt_DD" ac_path_lt_DD_found=: +fi]) +rm -f conftest.i conftest2.i conftest.out]) +])# _LT_PATH_DD + + +# _LT_CMD_TRUNCATE +# ---------------- +# find command to truncate a binary pipe +m4_defun([_LT_CMD_TRUNCATE], +[m4_require([_LT_PATH_DD]) +AC_CACHE_CHECK([how to truncate binary pipes], [lt_cv_truncate_bin], +[printf 0123456789abcdef0123456789abcdef >conftest.i +cat conftest.i conftest.i >conftest2.i +lt_cv_truncate_bin= +if "$ac_cv_path_lt_DD" bs=32 count=1 conftest.out 2>/dev/null; then + cmp -s conftest.i conftest.out \ + && lt_cv_truncate_bin="$ac_cv_path_lt_DD bs=4096 count=1" +fi +rm -f conftest.i conftest2.i conftest.out +test -z "$lt_cv_truncate_bin" && lt_cv_truncate_bin="$SED -e 4q"]) +_LT_DECL([lt_truncate_bin], [lt_cv_truncate_bin], [1], + [Command to truncate a binary pipe]) +])# _LT_CMD_TRUNCATE + + +# _LT_CHECK_MAGIC_METHOD +# ---------------------- +# how to check for library dependencies +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_MAGIC_METHOD], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +AC_CACHE_CHECK([how to recognize dependent libraries], +lt_cv_deplibs_check_method, +[lt_cv_file_magic_cmd='$MAGIC_CMD' +lt_cv_file_magic_test_file= +lt_cv_deplibs_check_method='unknown' +# Need to set the preceding variable on all platforms that support +# interlibrary dependencies. +# 'none' -- dependencies not supported. +# 'unknown' -- same as none, but documents that we really don't know. +# 'pass_all' -- all dependencies passed with no checks. +# 'test_compile' -- check by making test program. +# 'file_magic [[regex]]' -- check by looking for files in library path +# that responds to the $file_magic_cmd with a given extended regex. +# If you have 'file' or equivalent on your system and you're not sure +# whether 'pass_all' will *always* work, you probably want this one. + +case $host_os in +aix[[4-9]]*) + lt_cv_deplibs_check_method=pass_all + ;; + +beos*) + lt_cv_deplibs_check_method=pass_all + ;; + +bsdi[[45]]*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)' + lt_cv_file_magic_cmd='$FILECMD -L' + lt_cv_file_magic_test_file=/shlib/libc.so + ;; + +cygwin*) + # func_win32_libid is a shell function defined in ltmain.sh + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + ;; + +mingw* | pw32*) + # Base MSYS/MinGW do not provide the 'file' command needed by + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + # Keep this pattern in sync with the one in func_win32_libid. + lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi + ;; + +cegcc*) + # use the weaker test based on 'objdump'. See mingw*. + lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + ;; + +darwin* | rhapsody*) + lt_cv_deplibs_check_method=pass_all + ;; + +freebsd* | dragonfly* | midnightbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + case $host_cpu in + i*86 ) + # Not sure whether the presence of OpenBSD here was a mistake. + # Let's accept both of them until this is cleared up. + lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` + ;; + esac + else + lt_cv_deplibs_check_method=pass_all + fi + ;; + +haiku*) + lt_cv_deplibs_check_method=pass_all + ;; + +hpux10.20* | hpux11*) + lt_cv_file_magic_cmd=$FILECMD + case $host_cpu in + ia64*) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' + lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so + ;; + hppa*64*) + [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] + lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl + ;; + *) + lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' + lt_cv_file_magic_test_file=/usr/lib/libc.sl + ;; + esac + ;; + +interix[[3-9]]*) + # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' + ;; + +irix5* | irix6* | nonstopux*) + case $LD in + *-32|*"-32 ") libmagic=32-bit;; + *-n32|*"-n32 ") libmagic=N32;; + *-64|*"-64 ") libmagic=64-bit;; + *) libmagic=never-match;; + esac + lt_cv_deplibs_check_method=pass_all + ;; + +# This must be glibc/ELF. +linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + lt_cv_deplibs_check_method=pass_all + ;; + +netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' + fi + ;; + +newos6*) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' + lt_cv_file_magic_cmd=$FILECMD + lt_cv_file_magic_test_file=/usr/lib/libnls.so + ;; + +*nto* | *qnx*) + lt_cv_deplibs_check_method=pass_all + ;; + +openbsd* | bitrig*) + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' + else + lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' + fi + ;; + +osf3* | osf4* | osf5*) + lt_cv_deplibs_check_method=pass_all + ;; + +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + +solaris*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) + lt_cv_deplibs_check_method=pass_all + ;; + +sysv4 | sysv4.3*) + case $host_vendor in + motorola) + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' + lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` + ;; + ncr) + lt_cv_deplibs_check_method=pass_all + ;; + sequent) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' + ;; + sni) + lt_cv_file_magic_cmd='/bin/file' + lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" + lt_cv_file_magic_test_file=/lib/libc.so + ;; + siemens) + lt_cv_deplibs_check_method=pass_all + ;; + pc) + lt_cv_deplibs_check_method=pass_all + ;; + esac + ;; + +tpf*) + lt_cv_deplibs_check_method=pass_all + ;; +os2*) + lt_cv_deplibs_check_method=pass_all + ;; +esac +]) + +file_magic_glob= +want_nocaseglob=no +if test "$build" = "$host"; then + case $host_os in + mingw* | pw32*) + if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then + want_nocaseglob=yes + else + file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` + fi + ;; + esac +fi + +file_magic_cmd=$lt_cv_file_magic_cmd +deplibs_check_method=$lt_cv_deplibs_check_method +test -z "$deplibs_check_method" && deplibs_check_method=unknown + +_LT_DECL([], [deplibs_check_method], [1], + [Method to check whether dependent libraries are shared objects]) +_LT_DECL([], [file_magic_cmd], [1], + [Command to use when deplibs_check_method = "file_magic"]) +_LT_DECL([], [file_magic_glob], [1], + [How to find potential files when deplibs_check_method = "file_magic"]) +_LT_DECL([], [want_nocaseglob], [1], + [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) +])# _LT_CHECK_MAGIC_METHOD + + +# LT_PATH_NM +# ---------- +# find the pathname to a BSD- or MS-compatible name lister +AC_DEFUN([LT_PATH_NM], +[AC_REQUIRE([AC_PROG_CC])dnl +AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, +[if test -n "$NM"; then + # Let the user override the test. + lt_cv_path_NM=$NM +else + lt_nm_to_check=${ac_tool_prefix}nm + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + lt_nm_to_check="$lt_nm_to_check nm" + fi + for lt_tmp_nm in $lt_nm_to_check; do + lt_save_ifs=$IFS; IFS=$PATH_SEPARATOR + for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do + IFS=$lt_save_ifs + test -z "$ac_dir" && ac_dir=. + tmp_nm=$ac_dir/$lt_tmp_nm + if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext"; then + # Check to see if the nm accepts a BSD-compat flag. + # Adding the 'sed 1q' prevents false positives on HP-UX, which says: + # nm: unknown option "B" ignored + # Tru64's nm complains that /dev/null is an invalid object file + # MSYS converts /dev/null to NUL, MinGW nm treats NUL as empty + case $build_os in + mingw*) lt_bad_file=conftest.nm/nofile ;; + *) lt_bad_file=/dev/null ;; + esac + case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in + *$lt_bad_file* | *'Invalid file or object type'*) + lt_cv_path_NM="$tmp_nm -B" + break 2 + ;; + *) + case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in + */dev/null*) + lt_cv_path_NM="$tmp_nm -p" + break 2 + ;; + *) + lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but + continue # so that we can try to find one that supports BSD flags + ;; + esac + ;; + esac + fi + done + IFS=$lt_save_ifs + done + : ${lt_cv_path_NM=no} +fi]) +if test no != "$lt_cv_path_NM"; then + NM=$lt_cv_path_NM +else + # Didn't find any BSD compatible name lister, look for dumpbin. + if test -n "$DUMPBIN"; then : + # Let the user override the test. + else + AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) + case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in + *COFF*) + DUMPBIN="$DUMPBIN -symbols -headers" + ;; + *) + DUMPBIN=: + ;; + esac + fi + AC_SUBST([DUMPBIN]) + if test : != "$DUMPBIN"; then + NM=$DUMPBIN + fi +fi +test -z "$NM" && NM=nm +AC_SUBST([NM]) +_LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl + +AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], + [lt_cv_nm_interface="BSD nm" + echo "int some_variable = 0;" > conftest.$ac_ext + (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) + (eval "$ac_compile" 2>conftest.err) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) + (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) + cat conftest.err >&AS_MESSAGE_LOG_FD + (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) + cat conftest.out >&AS_MESSAGE_LOG_FD + if $GREP 'External.*some_variable' conftest.out > /dev/null; then + lt_cv_nm_interface="MS dumpbin" + fi + rm -f conftest*]) +])# LT_PATH_NM + +# Old names: +AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) +AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_PROG_NM], []) +dnl AC_DEFUN([AC_PROG_NM], []) + +# _LT_CHECK_SHAREDLIB_FROM_LINKLIB +# -------------------------------- +# how to determine the name of the shared library +# associated with a specific link library. +# -- PORTME fill in with the dynamic library characteristics +m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], +[m4_require([_LT_DECL_EGREP]) +m4_require([_LT_DECL_OBJDUMP]) +m4_require([_LT_DECL_DLLTOOL]) +AC_CACHE_CHECK([how to associate runtime and link libraries], +lt_cv_sharedlib_from_linklib_cmd, +[lt_cv_sharedlib_from_linklib_cmd='unknown' + +case $host_os in +cygwin* | mingw* | pw32* | cegcc*) + # two different shell functions defined in ltmain.sh; + # decide which one to use based on capabilities of $DLLTOOL + case `$DLLTOOL --help 2>&1` in + *--identify-strict*) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib + ;; + *) + lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback + ;; + esac + ;; +*) + # fallback: assume linklib IS sharedlib + lt_cv_sharedlib_from_linklib_cmd=$ECHO + ;; +esac +]) +sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd +test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO + +_LT_DECL([], [sharedlib_from_linklib_cmd], [1], + [Command to associate shared and link libraries]) +])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB + + +# _LT_PATH_MANIFEST_TOOL +# ---------------------- +# locate the manifest tool +m4_defun([_LT_PATH_MANIFEST_TOOL], +[AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) +test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt +AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], + [lt_cv_path_mainfest_tool=no + echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD + $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out + cat conftest.err >&AS_MESSAGE_LOG_FD + if $GREP 'Manifest Tool' conftest.out > /dev/null; then + lt_cv_path_mainfest_tool=yes + fi + rm -f conftest*]) +if test yes != "$lt_cv_path_mainfest_tool"; then + MANIFEST_TOOL=: +fi +_LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl +])# _LT_PATH_MANIFEST_TOOL + + +# _LT_DLL_DEF_P([FILE]) +# --------------------- +# True iff FILE is a Windows DLL '.def' file. +# Keep in sync with func_dll_def_p in the libtool script +AC_DEFUN([_LT_DLL_DEF_P], +[dnl + test DEF = "`$SED -n dnl + -e '\''s/^[[ ]]*//'\'' dnl Strip leading whitespace + -e '\''/^\(;.*\)*$/d'\'' dnl Delete empty lines and comments + -e '\''s/^\(EXPORTS\|LIBRARY\)\([[ ]].*\)*$/DEF/p'\'' dnl + -e q dnl Only consider the first "real" line + $1`" dnl +])# _LT_DLL_DEF_P + + +# LT_LIB_M +# -------- +# check for math library +AC_DEFUN([LT_LIB_M], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*-ncr-sysv4.3*) + AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM=-lmw) + AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") + ;; +*) + AC_CHECK_LIB(m, cos, LIBM=-lm) + ;; +esac +AC_SUBST([LIBM]) +])# LT_LIB_M + +# Old name: +AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_CHECK_LIBM], []) + + +# _LT_COMPILER_NO_RTTI([TAGNAME]) +# ------------------------------- +m4_defun([_LT_COMPILER_NO_RTTI], +[m4_require([_LT_TAG_COMPILER])dnl + +_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + +if test yes = "$GCC"; then + case $cc_basename in + nvcc*) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; + *) + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; + esac + + _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], + lt_cv_prog_compiler_rtti_exceptions, + [-fno-rtti -fno-exceptions], [], + [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) +fi +_LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], + [Compiler flag to turn off builtin functions]) +])# _LT_COMPILER_NO_RTTI + + +# _LT_CMD_GLOBAL_SYMBOLS +# ---------------------- +m4_defun([_LT_CMD_GLOBAL_SYMBOLS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_PROG_CC])dnl +AC_REQUIRE([AC_PROG_AWK])dnl +AC_REQUIRE([LT_PATH_NM])dnl +AC_REQUIRE([LT_PATH_LD])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_TAG_COMPILER])dnl + +# Check for command to grab the raw symbol name followed by C symbol from nm. +AC_MSG_CHECKING([command to parse $NM output from $compiler object]) +AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], +[ +# These are sane defaults that work on at least a few old systems. +# [They come from Ultrix. What could be older than Ultrix?!! ;)] + +# Character class describing NM global symbol codes. +symcode='[[BCDEGRST]]' + +# Regexp to match symbols that can be accessed directly from C. +sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' + +# Define system-specific variables. +case $host_os in +aix*) + symcode='[[BCDT]]' + ;; +cygwin* | mingw* | pw32* | cegcc*) + symcode='[[ABCDGISTW]]' + ;; +hpux*) + if test ia64 = "$host_cpu"; then + symcode='[[ABCDEGRST]]' + fi + ;; +irix* | nonstopux*) + symcode='[[BCDEGRST]]' + ;; +osf*) + symcode='[[BCDEGQRST]]' + ;; +solaris*) + symcode='[[BDRT]]' + ;; +sco3.2v5*) + symcode='[[DT]]' + ;; +sysv4.2uw2*) + symcode='[[DT]]' + ;; +sysv5* | sco5v6* | unixware* | OpenUNIX*) + symcode='[[ABDT]]' + ;; +sysv4) + symcode='[[DFNSTU]]' + ;; +esac + +# If we're using GNU nm, then use its standard symbol codes. +case `$NM -V 2>&1` in +*GNU* | *'with BFD'*) + symcode='[[ABCDGIRSTW]]' ;; +esac + +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Gets list of data symbols to import. + lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'" + # Adjust the below global symbol transforms to fixup imported variables. + lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'" + lt_c_name_hook=" -e 's/^I .* \(.*\)$/ {\"\1\", (void *) 0},/p'" + lt_c_name_lib_hook="\ + -e 's/^I .* \(lib.*\)$/ {\"\1\", (void *) 0},/p'\ + -e 's/^I .* \(.*\)$/ {\"lib\1\", (void *) 0},/p'" +else + # Disable hooks by default. + lt_cv_sys_global_symbol_to_import= + lt_cdecl_hook= + lt_c_name_hook= + lt_c_name_lib_hook= +fi + +# Transform an extracted symbol line into a proper C declaration. +# Some systems (esp. on ia64) link data and code symbols differently, +# so use this general approach. +lt_cv_sys_global_symbol_to_cdecl="$SED -n"\ +$lt_cdecl_hook\ +" -e 's/^T .* \(.*\)$/extern int \1();/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'" + +# Transform an extracted symbol line into symbol name and symbol address +lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\ +$lt_c_name_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/p'" + +# Transform an extracted symbol line into symbol name with lib prefix and +# symbol address. +lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\ +$lt_c_name_lib_hook\ +" -e 's/^: \(.*\) .*$/ {\"\1\", (void *) 0},/p'"\ +" -e 's/^$symcode$symcode* .* \(lib.*\)$/ {\"\1\", (void *) \&\1},/p'"\ +" -e 's/^$symcode$symcode* .* \(.*\)$/ {\"lib\1\", (void *) \&\1},/p'" + +# Handle CRLF in mingw tool chain +opt_cr= +case $build_os in +mingw*) + opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp + ;; +esac + +# Try without a prefix underscore, then with it. +for ac_symprfx in "" "_"; do + + # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. + symxfrm="\\1 $ac_symprfx\\2 \\2" + + # Write the raw and C identifiers. + if test "$lt_cv_nm_interface" = "MS dumpbin"; then + # Fake it for dumpbin and say T for any non-static function, + # D for any global variable and I for any imported variable. + # Also find C++ and __fastcall symbols from MSVC++ or ICC, + # which start with @ or ?. + lt_cv_sys_global_symbol_pipe="$AWK ['"\ +" {last_section=section; section=\$ 3};"\ +" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ +" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ +" /^ *Symbol name *: /{split(\$ 0,sn,\":\"); si=substr(sn[2],2)};"\ +" /^ *Type *: code/{print \"T\",si,substr(si,length(prfx))};"\ +" /^ *Type *: data/{print \"I\",si,substr(si,length(prfx))};"\ +" \$ 0!~/External *\|/{next};"\ +" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ +" {if(hide[section]) next};"\ +" {f=\"D\"}; \$ 0~/\(\).*\|/{f=\"T\"};"\ +" {split(\$ 0,a,/\||\r/); split(a[2],s)};"\ +" s[1]~/^[@?]/{print f,s[1],s[1]; next};"\ +" s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\ +" ' prfx=^$ac_symprfx]" + else + lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" + fi + lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'" + + # Check to see that the pipe works correctly. + pipe_works=no + + rm -f conftest* + cat > conftest.$ac_ext <<_LT_EOF +#ifdef __cplusplus +extern "C" { +#endif +char nm_test_var; +void nm_test_func(void); +void nm_test_func(void){} +#ifdef __cplusplus +} +#endif +int main(){nm_test_var='a';nm_test_func();return(0);} +_LT_EOF + + if AC_TRY_EVAL(ac_compile); then + # Now try to grab the symbols. + nlist=conftest.nm + $ECHO "$as_me:$LINENO: $NM conftest.$ac_objext | $lt_cv_sys_global_symbol_pipe > $nlist" >&AS_MESSAGE_LOG_FD + if eval "$NM" conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist 2>&AS_MESSAGE_LOG_FD && test -s "$nlist"; then + # Try sorting and uniquifying the output. + if sort "$nlist" | uniq > "$nlist"T; then + mv -f "$nlist"T "$nlist" + else + rm -f "$nlist"T + fi + + # Make sure that we snagged all the symbols we need. + if $GREP ' nm_test_var$' "$nlist" >/dev/null; then + if $GREP ' nm_test_func$' "$nlist" >/dev/null; then + cat <<_LT_EOF > conftest.$ac_ext +/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ +#if defined _WIN32 || defined __CYGWIN__ || defined _WIN32_WCE +/* DATA imports from DLLs on WIN32 can't be const, because runtime + relocations are performed -- see ld's documentation on pseudo-relocs. */ +# define LT@&t@_DLSYM_CONST +#elif defined __osf__ +/* This system does not cope well with relocations in const data. */ +# define LT@&t@_DLSYM_CONST +#else +# define LT@&t@_DLSYM_CONST const +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +_LT_EOF + # Now generate the symbol file. + eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' + + cat <<_LT_EOF >> conftest.$ac_ext + +/* The mapping between symbol names and symbols. */ +LT@&t@_DLSYM_CONST struct { + const char *name; + void *address; +} +lt__PROGRAM__LTX_preloaded_symbols[[]] = +{ + { "@PROGRAM@", (void *) 0 }, +_LT_EOF + $SED "s/^$symcode$symcode* .* \(.*\)$/ {\"\1\", (void *) \&\1},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext + cat <<\_LT_EOF >> conftest.$ac_ext + {0, (void *) 0} +}; + +/* This works around a problem in FreeBSD linker */ +#ifdef FREEBSD_WORKAROUND +static const void *lt_preloaded_setup() { + return lt__PROGRAM__LTX_preloaded_symbols; +} +#endif + +#ifdef __cplusplus +} +#endif +_LT_EOF + # Now try linking the two files. + mv conftest.$ac_objext conftstm.$ac_objext + lt_globsym_save_LIBS=$LIBS + lt_globsym_save_CFLAGS=$CFLAGS + LIBS=conftstm.$ac_objext + CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" + if AC_TRY_EVAL(ac_link) && test -s conftest$ac_exeext; then + pipe_works=yes + fi + LIBS=$lt_globsym_save_LIBS + CFLAGS=$lt_globsym_save_CFLAGS + else + echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD + fi + else + echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD + fi + else + echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD + cat conftest.$ac_ext >&5 + fi + rm -rf conftest* conftst* + + # Do not use the global_symbol_pipe unless it works. + if test yes = "$pipe_works"; then + break + else + lt_cv_sys_global_symbol_pipe= + fi +done +]) +if test -z "$lt_cv_sys_global_symbol_pipe"; then + lt_cv_sys_global_symbol_to_cdecl= +fi +if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then + AC_MSG_RESULT(failed) +else + AC_MSG_RESULT(ok) +fi + +# Response file support. +if test "$lt_cv_nm_interface" = "MS dumpbin"; then + nm_file_list_spec='@' +elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then + nm_file_list_spec='@' +fi + +_LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], + [Take the output of nm and produce a listing of raw symbols and C names]) +_LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], + [Transform the output of nm in a proper C declaration]) +_LT_DECL([global_symbol_to_import], [lt_cv_sys_global_symbol_to_import], [1], + [Transform the output of nm into a list of symbols to manually relocate]) +_LT_DECL([global_symbol_to_c_name_address], + [lt_cv_sys_global_symbol_to_c_name_address], [1], + [Transform the output of nm in a C name address pair]) +_LT_DECL([global_symbol_to_c_name_address_lib_prefix], + [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], + [Transform the output of nm in a C name address pair when lib prefix is needed]) +_LT_DECL([nm_interface], [lt_cv_nm_interface], [1], + [The name lister interface]) +_LT_DECL([], [nm_file_list_spec], [1], + [Specify filename containing input files for $NM]) +]) # _LT_CMD_GLOBAL_SYMBOLS + + +# _LT_COMPILER_PIC([TAGNAME]) +# --------------------------- +m4_defun([_LT_COMPILER_PIC], +[m4_require([_LT_TAG_COMPILER])dnl +_LT_TAGVAR(lt_prog_compiler_wl, $1)= +_LT_TAGVAR(lt_prog_compiler_pic, $1)= +_LT_TAGVAR(lt_prog_compiler_static, $1)= + +m4_if([$1], [CXX], [ + # C++ specific cases for pic, static, wl, etc. + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + *djgpp*) + # DJGPP does not support shared libraries at all + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + else + case $host_os in + aix[[4-9]]*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + chorus*) + case $cc_basename in + cxch68*) + # Green Hills C++ Compiler + # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" + ;; + esac + ;; + mingw* | cygwin* | os2* | pw32* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + ;; + dgux*) + case $cc_basename in + ec++*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + ghcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD uses GNU C++ + ;; + hpux9* | hpux10* | hpux11*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + fi + ;; + aCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + ;; + *) + ;; + esac + ;; + interix*) + # This is c89, which is MS Visual C++ (no shared libs) + # Anyone wants to do a port? + ;; + irix5* | irix6* | nonstopux*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + # CC pic flag -KPIC is the default. + ;; + *) + ;; + esac + ;; + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # KAI C++ Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + ecpc* ) + # old Intel C++ for x86_64, which still supported -KPIC. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + icpc* ) + # Intel C++, used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + cxx*) + # Compaq C++ + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) + # IBM XL 8.0, 9.0 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac + ;; + esac + ;; + lynxos*) + ;; + m88k*) + ;; + mvs*) + case $cc_basename in + cxx*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' + ;; + *) + ;; + esac + ;; + netbsd* | netbsdelf*-gnu) + ;; + *qnx* | *nto*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' + ;; + RCC*) + # Rational C++ 2.4.1 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + cxx*) + # Digital/Compaq C++ + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # Make sure the PIC flag is empty. It appears that all Alpha + # Linux and Compaq Tru64 Unix objects are PIC. + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + *) + ;; + esac + ;; + psos*) + ;; + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + ;; + *) + ;; + esac + ;; + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + lcc*) + # Lucid + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + ;; + *) + ;; + esac + ;; + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + case $cc_basename in + CC*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + ;; + *) + ;; + esac + ;; + vxworks*) + ;; + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +], +[ + if test yes = "$GCC"; then + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + + case $host_os in + aix*) + # All AIX code is PIC. + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + m68k) + # FIXME: we need at least 68020 code to build shared libraries, but + # adding the '-m68020' flag to GCC prevents building anything better, + # like '-m68040'. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' + ;; + esac + ;; + + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + # PIC is the default for these OSes. + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + ;; + + haiku*) + # PIC is the default for Haiku. + # The "-static" flag exists, but is broken. + _LT_TAGVAR(lt_prog_compiler_static, $1)= + ;; + + hpux*) + # PIC is the default for 64-bit PA HP-UX, but not for 32-bit + # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag + # sets the default TLS model and affects inlining. + case $host_cpu in + hppa*64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + ;; + + interix[[3-9]]*) + # Interix 3.x gcc -fpic/-fPIC options generate broken code. + # Instead, we relocate shared libraries at runtime. + ;; + + msdosdjgpp*) + # Just because we use GCC doesn't mean we suddenly get shared libraries + # on systems that don't support them. + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + enable_shared=no + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic + fi + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + ;; + esac + + case $cc_basename in + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' + if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" + fi + ;; + esac + else + # PORTME Check for flag to pass linker flags through the system compiler. + case $host_os in + aix*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + if test ia64 = "$host_cpu"; then + # AIX 5 now supports IA64 processor + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + else + _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' + fi + ;; + + darwin* | rhapsody*) + # PIC is the default on this platform + # Common symbols not allowed in MH_DYLIB files + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' + case $cc_basename in + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + + mingw* | cygwin* | pw32* | os2* | cegcc*) + # This hack is so that the source file can tell whether it is being + # built for inclusion in a dll (and should export symbols for example). + m4_if([$1], [GCJ], [], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) + case $host_os in + os2*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-static' + ;; + esac + ;; + + hpux9* | hpux10* | hpux11*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but + # not for PA HP-UX. + case $host_cpu in + hppa*64*|ia64*) + # +Z the default + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' + ;; + esac + # Is there a better lt_prog_compiler_static that works with the bundled CC? + _LT_TAGVAR(lt_prog_compiler_static, $1)='$wl-a ${wl}archive' + ;; + + irix5* | irix6* | nonstopux*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # PIC (with -KPIC) is the default. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + # old Intel for x86_64, which still supported -KPIC. + ecc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # flang / f18. f95 an alias for gfortran or flang on Debian + flang* | f18* | f95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # icc used to be incompatible with GCC. + # ICC 10 doesn't accept -KPIC any more. + icc* | ifort*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + # Lahey Fortran 8.1. + lf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' + _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' + ;; + nagfor*) + # NAG Fortran compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group compilers (*not* the Pentium gcc compiler, + # which looks to be a dead project) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + ccc*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All Alpha code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + xl* | bgxl* | bgf* | mpixl*) + # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + *Sun\ F* | *Sun*Fortran*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + *Sun\ C*) + # Sun C 5.9 + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Intel*\ [[CF]]*Compiler*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' + ;; + *Portland\ Group*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + esac + ;; + esac + ;; + + newsos6) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *nto* | *qnx*) + # QNX uses GNU C++, but need to define -shared option too, otherwise + # it will coredump. + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' + ;; + + osf3* | osf4* | osf5*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + # All OSF/1 code is PIC. + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + rdos*) + _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + + solaris*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + case $cc_basename in + f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; + *) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; + esac + ;; + + sunos4*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4 | sysv4.2uw2* | sysv4.3*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + fi + ;; + + sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + unicos*) + _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + + uts4*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' + _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + ;; + + *) + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no + ;; + esac + fi +]) +case $host_os in + # For platforms that do not support PIC, -DPIC is meaningless: + *djgpp*) + _LT_TAGVAR(lt_prog_compiler_pic, $1)= + ;; + *) + _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" + ;; +esac + +AC_CACHE_CHECK([for $compiler option to produce PIC], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], + [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) +_LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) + +# +# Check to make sure the PIC flag actually works. +# +if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then + _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], + [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], + [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], + [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in + "" | " "*) ;; + *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; + esac], + [_LT_TAGVAR(lt_prog_compiler_pic, $1)= + _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) +fi +_LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], + [Additional compiler flags for building library objects]) + +_LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], + [How to pass a linker flag through the compiler]) +# +# Check to make sure the static flag actually works. +# +wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" +_LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], + _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), + $lt_tmp_static_flag, + [], + [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) +_LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], + [Compiler flag to prevent dynamic linking]) +])# _LT_COMPILER_PIC + + +# _LT_LINKER_SHLIBS([TAGNAME]) +# ---------------------------- +# See if the linker supports building shared libraries. +m4_defun([_LT_LINKER_SHLIBS], +[AC_REQUIRE([LT_PATH_LD])dnl +AC_REQUIRE([LT_PATH_NM])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_DECL_SED])dnl +m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl +m4_require([_LT_TAG_COMPILER])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +m4_if([$1], [CXX], [ + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + case $host_os in + aix[[4-9]]*) + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + ;; + pw32*) + _LT_TAGVAR(export_symbols_cmds, $1)=$ltdll_cmds + ;; + cygwin* | mingw* | cegcc*) + case $cc_basename in + cl* | icl*) + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + ;; + esac + ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; + *) + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + ;; + esac +], [ + runpath_var= + _LT_TAGVAR(allow_undefined_flag, $1)= + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(archive_cmds, $1)= + _LT_TAGVAR(archive_expsym_cmds, $1)= + _LT_TAGVAR(compiler_needs_object, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(hardcode_automatic, $1)=no + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(hardcode_libdir_separator, $1)= + _LT_TAGVAR(hardcode_minus_L, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported + _LT_TAGVAR(inherit_rpath, $1)=no + _LT_TAGVAR(link_all_deplibs, $1)=unknown + _LT_TAGVAR(module_cmds, $1)= + _LT_TAGVAR(module_expsym_cmds, $1)= + _LT_TAGVAR(old_archive_from_new_cmds, $1)= + _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= + _LT_TAGVAR(thread_safe_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + # include_expsyms should be a list of space-separated symbols to be *always* + # included in the symbol list + _LT_TAGVAR(include_expsyms, $1)= + # exclude_expsyms can be an extended regexp of symbols to exclude + # it will be wrapped by ' (' and ')$', so one must not match beginning or + # end of line. Example: 'a|bc|.*d.*' will exclude the symbols 'a' and 'bc', + # as well as any symbol that contains 'd'. + _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] + # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out + # platforms (ab)use it in PIC code, but their linkers get confused if + # the symbol is explicitly referenced. Since portable code cannot + # rely on this symbol name, it's probably fine to never include it in + # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. + extract_expsyms_cmds= + + case $host_os in + cygwin* | mingw* | pw32* | cegcc*) + # FIXME: the MSVC++ and ICC port hasn't been tested in a loooong time + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + if test yes != "$GCC"; then + with_gnu_ld=no + fi + ;; + interix*) + # we just hope/assume this is gcc and not c89 (= MSVC++ or ICC) + with_gnu_ld=yes + ;; + openbsd* | bitrig*) + with_gnu_ld=no + ;; + linux* | k*bsd*-gnu | gnu*) + _LT_TAGVAR(link_all_deplibs, $1)=no + ;; + esac + + _LT_TAGVAR(ld_shlibs, $1)=yes + + # On some targets, GNU ld is compatible enough with the native linker + # that we're better off using the native interface for both. + lt_use_gnu_ld_interface=no + if test yes = "$with_gnu_ld"; then + case $host_os in + aix*) + # The AIX port of GNU ld has always aspired to compatibility + # with the native linker. However, as the warning in the GNU ld + # block says, versions before 2.19.5* couldn't really create working + # shared libraries, regardless of the interface used. + case `$LD -v 2>&1` in + *\ \(GNU\ Binutils\)\ 2.19.5*) ;; + *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; + *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + ;; + *) + lt_use_gnu_ld_interface=yes + ;; + esac + fi + + if test yes = "$lt_use_gnu_ld_interface"; then + # If archive_cmds runs LD, not CC, wlarc should be empty + wlarc='$wl' + + # Set some defaults for GNU ld with shared library support. These + # are reset later if shared libraries are not supported. Putting them + # here allows them to be overridden if necessary. + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + # ancient GNU ld didn't support --whole-archive et. al. + if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + supports_anon_versioning=no + case `$LD -v | $SED -e 's/([[^)]]\+)\s\+//' 2>&1` in + *GNU\ gold*) supports_anon_versioning=yes ;; + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 + *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... + *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... + *\ 2.11.*) ;; # other 2.11 versions + *) supports_anon_versioning=yes ;; + esac + + # See if GNU ld supports shared libraries. + case $host_os in + aix[[3-9]]*) + # On AIX/PPC, the GNU linker is very broken + if test ia64 != "$host_cpu"; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: the GNU linker, at least up to release 2.19, is reported +*** to be unable to reliably create shared libraries on AIX. +*** Therefore, libtool is disabling shared libraries support. If you +*** really care for shared libraries, you may want to install binutils +*** 2.20 or above, or modify your PATH so that a non-GNU linker is found. +*** You will then need to restart the configuration process. + +_LT_EOF + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' + _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + + gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) + tmp_diet=no + if test linux-dietlibc = "$host_os"; then + case $cc_basename in + diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) + esac + fi + if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ + && test no = "$tmp_diet" + then + tmp_addflag=' $pic_flag' + tmp_sharedflag='-shared' + case $cc_basename,$host_cpu in + pgcc*) # Portland Group C compiler + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag' + ;; + pgf77* | pgf90* | pgf95* | pgfortran*) + # Portland Group f77 and f90 compilers + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + tmp_addflag=' $pic_flag -Mnomain' ;; + ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 + tmp_addflag=' -i_dynamic' ;; + efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 + tmp_addflag=' -i_dynamic -nofor_main' ;; + ifc* | ifort*) # Intel Fortran compiler + tmp_addflag=' -nofor_main' ;; + lf95*) # Lahey Fortran 8.1 + _LT_TAGVAR(whole_archive_flag_spec, $1)= + tmp_sharedflag='--shared' ;; + nagfor*) # NAGFOR 5.3 + tmp_sharedflag='-Wl,-shared' ;; + xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) + tmp_sharedflag='-qmkshrobj' + tmp_addflag= ;; + nvcc*) # Cuda Compiler Driver 2.2 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + ;; + esac + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) # Sun C 5.9 + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + tmp_sharedflag='-G' ;; + *Sun\ F*) # Sun Fortran 8.3 + tmp_sharedflag='-G' ;; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + + case $cc_basename in + tcc*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-rdynamic' + ;; + xlf* | bgf* | bgxlf* | mpixlf*) + # IBM XL Fortran 10.1 on PPC cannot create shared libs itself + _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' + fi + ;; + esac + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' + wlarc= + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + fi + ;; + + solaris*) + if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: The releases 2.8.* of the GNU linker cannot reliably +*** create shared libraries on Solaris systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.9.1 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) + case `$LD -v 2>&1` in + *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) + _LT_TAGVAR(ld_shlibs, $1)=no + cat <<_LT_EOF 1>&2 + +*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 cannot +*** reliably create shared libraries on SCO systems. Therefore, libtool +*** is disabling shared libraries support. We urge you to upgrade GNU +*** binutils to release 2.16.91.0.3 or newer. Another option is to modify +*** your PATH or compiler configuration so that the native linker is +*** used, and then restart. + +_LT_EOF + ;; + *) + # For security reasons, it is highly recommended that you always + # use absolute paths for naming shared libraries, and exclude the + # DT_RUNPATH tag from executables and libraries. But doing so + # requires that you compile everything twice, which is a pain. + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + sunos4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + + if test no = "$_LT_TAGVAR(ld_shlibs, $1)"; then + runpath_var= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= + _LT_TAGVAR(export_dynamic_flag_spec, $1)= + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + # PORTME fill in a description of your system's linker (not GNU ld) + case $host_os in + aix3*) + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' + # Note: this linker hardcodes the directories in LIBPATH if there + # are no directories specified by -L. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + if test yes = "$GCC" && test -z "$lt_prog_compiler_static"; then + # Neither direct hardcoding nor static linking is supported with a + # broken collect2. + _LT_TAGVAR(hardcode_direct, $1)=unsupported + fi + ;; + + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + # If we're using GNU nm, then we don't want the "-C" option. + # -C means demangle to GNU nm, but means don't demangle to AIX nm. + # Without the "-l" option, or with the "-B" option, AIX nm treats + # weak defined symbols like other global defined symbols, whereas + # GNU nm marks them as "W". + # While the 'weak' keyword is ignored in the Export File, we need + # it in the Import File for the 'aix-soname' feature, so we have + # to replace the "-B" option with "-P" for AIX nm. + if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then + _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { if (\$ 2 == "W") { print \$ 3 " weak" } else { print \$ 3 } } }'\'' | sort -u > $export_symbols' + else + _LT_TAGVAR(export_symbols_cmds, $1)='`func_echo_all $NM | $SED -e '\''s/B\([[^B]]*\)$/P\1/'\''` -PCpgl $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L") || (\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) && ([substr](\$ 1,1,1) != ".")) { if ((\$ 2 == "W") || (\$ 2 == "V") || (\$ 2 == "Z")) { print \$ 1 " weak" } else { print \$ 1 } } }'\'' | sort -u > $export_symbols' + fi + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + if (test x-brtl = "x$ld_flag" || test x-Wl,-brtl = "x$ld_flag"); then + aix_use_runtimelinking=yes + break + fi + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # traditional, no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GCC"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + ;; + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag="$shared_flag "'$wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(allow_undefined_flag, $1)='-berok' + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared libraries. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + amigaos*) + case $host_cpu in + powerpc) + # see comment about AmigaOS4 .so support + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='' + ;; + m68k) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + ;; + + bsdi[[45]]*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic + ;; + + cygwin* | mingw* | pw32* | cegcc*) + # When not using gcc, we currently assume that we are using + # Microsoft Visual C++ or Intel C++ Compiler. + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + case $cc_basename in + cl* | icl*) + # Native MSVC or ICC + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' + _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # Assume MSVC and ICC wrapper + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' + # The linker will automatically build a .lib file if we build a DLL. + _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + # FIXME: Should let the user specify the lib program. + _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + ;; + esac + ;; + + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + dgux*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor + # support. Future versions do this automatically, but an explicit c++rt0.o + # does not break anything, and helps significantly (at the cost of a little + # extra space). + freebsd2.2*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # Unfortunately, older versions of FreeBSD 2 do not have this feature. + freebsd2.*) + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + # FreeBSD 3 and greater uses gcc -shared to do shared libraries. + freebsd* | dragonfly* | midnightbsd*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + hpux9*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + + hpux10*) + if test yes,no = "$GCC,$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + fi + ;; + + hpux11*) + if test yes,no = "$GCC,$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + else + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + m4_if($1, [], [ + # Older versions of the 11.00 compiler do not understand -b yet + # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) + _LT_LINKER_OPTION([if $CC understands -b], + _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], + [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], + [_LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) + ;; + esac + fi + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + + # hardcode_minus_L: Not really in the search PATH, + # but as the default location of the library. + _LT_TAGVAR(hardcode_minus_L, $1)=yes + ;; + esac + fi + ;; + + irix5* | irix6* | nonstopux*) + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + # Try to use the -exported_symbol ld option, if it does not + # work, assume that -exports_file does not work either and + # implicitly export all symbols. + # This should be the same for all languages, so no per-tag cache variable. + AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], + [lt_cv_irix_exported_symbol], + [save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -shared $wl-exported_symbol ${wl}foo $wl-update_registry $wl/dev/null" + AC_LINK_IFELSE( + [AC_LANG_SOURCE( + [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], + [C++], [[int foo (void) { return 0; }]], + [Fortran 77], [[ + subroutine foo + end]], + [Fortran], [[ + subroutine foo + end]])])], + [lt_cv_irix_exported_symbol=yes], + [lt_cv_irix_exported_symbol=no]) + LDFLAGS=$save_LDFLAGS]) + if test yes = "$lt_cv_irix_exported_symbol"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations $wl-exports_file $wl$export_symbols -o $lib' + fi + _LT_TAGVAR(link_all_deplibs, $1)=no + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -exports_file $export_symbols -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + linux*) + case $cc_basename in + tcc*) + # Fabrice Bellard et al's Tiny C Compiler + _LT_TAGVAR(ld_shlibs, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + esac + ;; + + netbsd* | netbsdelf*-gnu) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out + else + _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + newsos6) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *nto* | *qnx*) + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags $wl-retain-symbols-file,$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + fi + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + osf3*) + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + osf4* | osf5*) # as osf3* with the addition of -msym flag + if test yes = "$GCC"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $pic_flag $libobjs $deplibs $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + else + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $wl-input $wl$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~$RM $lib.exp' + + # Both c and cxx compiler support -rpath directly + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)='no' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + ;; + + solaris*) + _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' + if test yes = "$GCC"; then + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $wl-z ${wl}text $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag $wl-z ${wl}text $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + else + case `$CC -V 2>&1` in + *"Compilers 5.0"*) + wlarc='' + _LT_TAGVAR(archive_cmds, $1)='$LD -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $LD -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' + ;; + *) + wlarc='$wl' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h $soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' + ;; + esac + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. GCC discards it without '$wl', + # but is careful enough not to reorder. + # Supported since Solaris 2.6 (maybe 2.5.1?) + if test yes = "$GCC"; then + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + fi + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + sunos4*) + if test sequent = "$host_vendor"; then + # Use $CC to link under sequent, because it throws in some extra .o + # files that make .init and .fini sections work. + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h $soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' + fi + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4) + case $host_vendor in + sni) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? + ;; + siemens) + ## LD is ld it makes a PLAMLIB + ## CC just makes a GrossModule. + _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' + _LT_TAGVAR(hardcode_direct, $1)=no + ;; + motorola) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie + ;; + esac + runpath_var='LD_RUN_PATH' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + sysv4.3*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' + ;; + + sysv4*MP*) + if test -d /usr/nec; then + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var=LD_RUN_PATH + hardcode_runpath_var=yes + _LT_TAGVAR(ld_shlibs, $1)=yes + fi + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + if test yes = "$GCC"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + fi + ;; + + uts4*) + _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + + *) + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + if test sni = "$host_vendor"; then + case $host in + sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Blargedynsym' + ;; + esac + fi + fi +]) +AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) +test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + +_LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld + +_LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl +_LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl +_LT_DECL([], [extract_expsyms_cmds], [2], + [The commands to extract the exported symbol list from a shared archive]) + +# +# Do we need to explicitly link libc? +# +case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in +x|xyes) + # Assume -lc should be added + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + + if test yes,yes = "$GCC,$enable_shared"; then + case $_LT_TAGVAR(archive_cmds, $1) in + *'~'*) + # FIXME: we may have to deal with multi-command sequences. + ;; + '$CC '*) + # Test whether the compiler implicitly links with -lc since on some + # systems, -lgcc has to come before -lc. If gcc already passes -lc + # to ld, don't add -lc before -lgcc. + AC_CACHE_CHECK([whether -lc should be explicitly linked in], + [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), + [$RM conftest* + echo "$lt_simple_compile_test_code" > conftest.$ac_ext + + if AC_TRY_EVAL(ac_compile) 2>conftest.err; then + soname=conftest + lib=conftest + libobjs=conftest.$ac_objext + deplibs= + wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) + pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) + compiler_flags=-v + linker_flags=-v + verstring= + output_objdir=. + libname=conftest + lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) + _LT_TAGVAR(allow_undefined_flag, $1)= + if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) + then + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no + else + lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes + fi + _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag + else + cat conftest.err 1>&5 + fi + $RM conftest* + ]) + _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) + ;; + esac + fi + ;; +esac + +_LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], + [Whether or not to add -lc for building shared libraries]) +_LT_TAGDECL([allow_libtool_libs_with_static_runtimes], + [enable_shared_with_static_runtimes], [0], + [Whether or not to disallow shared libs when runtime libs are static]) +_LT_TAGDECL([], [export_dynamic_flag_spec], [1], + [Compiler flag to allow reflexive dlopens]) +_LT_TAGDECL([], [whole_archive_flag_spec], [1], + [Compiler flag to generate shared objects directly from archives]) +_LT_TAGDECL([], [compiler_needs_object], [1], + [Whether the compiler copes with passing no objects directly]) +_LT_TAGDECL([], [old_archive_from_new_cmds], [2], + [Create an old-style archive from a shared archive]) +_LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], + [Create a temporary old-style archive to link instead of a shared archive]) +_LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) +_LT_TAGDECL([], [archive_expsym_cmds], [2]) +_LT_TAGDECL([], [module_cmds], [2], + [Commands used to build a loadable module if different from building + a shared archive.]) +_LT_TAGDECL([], [module_expsym_cmds], [2]) +_LT_TAGDECL([], [with_gnu_ld], [1], + [Whether we are building with GNU ld or not]) +_LT_TAGDECL([], [allow_undefined_flag], [1], + [Flag that allows shared libraries with undefined symbols to be built]) +_LT_TAGDECL([], [no_undefined_flag], [1], + [Flag that enforces no undefined symbols]) +_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], + [Flag to hardcode $libdir into a binary during linking. + This must work even if $libdir does not exist]) +_LT_TAGDECL([], [hardcode_libdir_separator], [1], + [Whether we need a single "-rpath" flag with a separated argument]) +_LT_TAGDECL([], [hardcode_direct], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary]) +_LT_TAGDECL([], [hardcode_direct_absolute], [0], + [Set to "yes" if using DIR/libNAME$shared_ext during linking hardcodes + DIR into the resulting binary and the resulting library dependency is + "absolute", i.e impossible to change by setting $shlibpath_var if the + library is relocated]) +_LT_TAGDECL([], [hardcode_minus_L], [0], + [Set to "yes" if using the -LDIR flag during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_shlibpath_var], [0], + [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR + into the resulting binary]) +_LT_TAGDECL([], [hardcode_automatic], [0], + [Set to "yes" if building a shared library automatically hardcodes DIR + into the library and all subsequent libraries and executables linked + against it]) +_LT_TAGDECL([], [inherit_rpath], [0], + [Set to yes if linker adds runtime paths of dependent libraries + to runtime path list]) +_LT_TAGDECL([], [link_all_deplibs], [0], + [Whether libtool must link a program against all its dependency libraries]) +_LT_TAGDECL([], [always_export_symbols], [0], + [Set to "yes" if exported symbols are required]) +_LT_TAGDECL([], [export_symbols_cmds], [2], + [The commands to list exported symbols]) +_LT_TAGDECL([], [exclude_expsyms], [1], + [Symbols that should not be listed in the preloaded symbols]) +_LT_TAGDECL([], [include_expsyms], [1], + [Symbols that must always be exported]) +_LT_TAGDECL([], [prelink_cmds], [2], + [Commands necessary for linking programs (against libraries) with templates]) +_LT_TAGDECL([], [postlink_cmds], [2], + [Commands necessary for finishing linking programs]) +_LT_TAGDECL([], [file_list_spec], [1], + [Specify filename containing input files]) +dnl FIXME: Not yet implemented +dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], +dnl [Compiler flag to generate thread safe objects]) +])# _LT_LINKER_SHLIBS + + +# _LT_LANG_C_CONFIG([TAG]) +# ------------------------ +# Ensure that the configuration variables for a C compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_C_CONFIG], +[m4_require([_LT_DECL_EGREP])dnl +lt_save_CC=$CC +AC_LANG_PUSH(C) + +# Source file extension for C test sources. +ac_ext=c + +# Object file extension for compiled C test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="int some_variable = 0;" + +# Code to be used in simple link tests +lt_simple_link_test_code='int main(){return(0);}' + +_LT_TAG_COMPILER +# Save the default compiler, since it gets overwritten when the other +# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. +compiler_DEFAULT=$CC + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + LT_SYS_DLOPEN_SELF + _LT_CMD_STRIPLIB + + # Report what library types will actually be built + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_CONFIG($1) +fi +AC_LANG_POP +CC=$lt_save_CC +])# _LT_LANG_C_CONFIG + + +# _LT_LANG_CXX_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a C++ compiler are suitably +# defined. These variables are subsequently used by _LT_CONFIG to write +# the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_CXX_CONFIG], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +m4_require([_LT_DECL_EGREP])dnl +m4_require([_LT_PATH_MANIFEST_TOOL])dnl +if test -n "$CXX" && ( test no != "$CXX" && + ( (test g++ = "$CXX" && `g++ -v >/dev/null 2>&1` ) || + (test g++ != "$CXX"))); then + AC_PROG_CXXCPP +else + _lt_caught_CXX_error=yes +fi + +AC_LANG_PUSH(C++) +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(compiler_needs_object, $1)=no +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for C++ test sources. +ac_ext=cpp + +# Object file extension for compiled C++ test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the CXX compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_caught_CXX_error"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="int some_variable = 0;" + + # Code to be used in simple link tests + lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_CFLAGS=$CFLAGS + lt_save_LD=$LD + lt_save_GCC=$GCC + GCC=$GXX + lt_save_with_gnu_ld=$with_gnu_ld + lt_save_path_LD=$lt_cv_path_LD + if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then + lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx + else + $as_unset lt_cv_prog_gnu_ld + fi + if test -n "${lt_cv_path_LDCXX+set}"; then + lt_cv_path_LD=$lt_cv_path_LDCXX + else + $as_unset lt_cv_path_LD + fi + test -z "${LDCXX+set}" || LD=$LDCXX + CC=${CXX-"c++"} + CFLAGS=$CXXFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + # We don't want -fno-exception when compiling C++ code, so set the + # no_builtin_flag separately + if test yes = "$GXX"; then + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' + else + _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= + fi + + if test yes = "$GXX"; then + # Set up default GNU C++ configuration + + LT_PATH_LD + + # Check if GNU C++ uses GNU ld as the underlying linker, since the + # archiving commands below assume that GNU ld is being used. + if test yes = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # If archive_cmds runs LD, not CC, wlarc should be empty + # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to + # investigate it a little bit more. (MM) + wlarc='$wl' + + # ancient GNU ld didn't support --whole-archive et. al. + if eval "`$CC -print-prog-name=ld` --help 2>&1" | + $GREP 'no-whole-archive' > /dev/null; then + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + else + _LT_TAGVAR(whole_archive_flag_spec, $1)= + fi + else + with_gnu_ld=no + wlarc= + + # A generic and very simple default shared library creation + # command for GNU C++ for the case where it uses the native + # linker, instead of GNU ld. If possible, this setting should + # overridden to take advantage of the native linker features on + # the platform it is being used on. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + fi + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + + else + GXX=no + with_gnu_ld=no + wlarc= + fi + + # PORTME: fill in a description of your system's C++ link characteristics + AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) + _LT_TAGVAR(ld_shlibs, $1)=yes + case $host_os in + aix3*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aix[[4-9]]*) + if test ia64 = "$host_cpu"; then + # On IA64, the linker does run time linking by default, so we don't + # have to do anything special. + aix_use_runtimelinking=no + exp_sym_flag='-Bexport' + no_entry_flag= + else + aix_use_runtimelinking=no + + # Test if we are trying to use run time linking or normal + # AIX style linking. If -brtl is somewhere in LDFLAGS, we + # have runtime linking enabled, and use it for executables. + # For shared libraries, we enable/disable runtime linking + # depending on the kind of the shared library created - + # when "with_aix_soname,aix_use_runtimelinking" is: + # "aix,no" lib.a(lib.so.V) shared, rtl:no, for executables + # "aix,yes" lib.so shared, rtl:yes, for executables + # lib.a static archive + # "both,no" lib.so.V(shr.o) shared, rtl:yes + # lib.a(lib.so.V) shared, rtl:no, for executables + # "both,yes" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a(lib.so.V) shared, rtl:no + # "svr4,*" lib.so.V(shr.o) shared, rtl:yes, for executables + # lib.a static archive + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) + for ld_flag in $LDFLAGS; do + case $ld_flag in + *-brtl*) + aix_use_runtimelinking=yes + break + ;; + esac + done + if test svr4,no = "$with_aix_soname,$aix_use_runtimelinking"; then + # With aix-soname=svr4, we create the lib.so.V shared archives only, + # so we don't have lib.a shared libs to link our executables. + # We have to force runtime linking in this case. + aix_use_runtimelinking=yes + LDFLAGS="$LDFLAGS -Wl,-brtl" + fi + ;; + esac + + exp_sym_flag='-bexport' + no_entry_flag='-bnoentry' + fi + + # When large executables or shared objects are built, AIX ld can + # have problems creating the table of contents. If linking a library + # or program results in "error TOC overflow" add -mminimal-toc to + # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not + # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. + + _LT_TAGVAR(archive_cmds, $1)='' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='$wl-f,' + case $with_aix_soname,$aix_use_runtimelinking in + aix,*) ;; # no import file + svr4,* | *,yes) # use import file + # The Import File defines what to hardcode. + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=no + ;; + esac + + if test yes = "$GXX"; then + case $host_os in aix4.[[012]]|aix4.[[012]].*) + # We only want to do this on AIX 4.2 and lower, the check + # below for broken collect2 doesn't work under 4.3+ + collect2name=`$CC -print-prog-name=collect2` + if test -f "$collect2name" && + strings "$collect2name" | $GREP resolve_lib_name >/dev/null + then + # We have reworked collect2 + : + else + # We have old collect2 + _LT_TAGVAR(hardcode_direct, $1)=unsupported + # It fails to find uninstalled libraries when the uninstalled + # path is not listed in the libpath. Setting hardcode_minus_L + # to unsupported forces relinking + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)= + fi + esac + shared_flag='-shared' + if test yes = "$aix_use_runtimelinking"; then + shared_flag=$shared_flag' $wl-G' + fi + # Need to ensure runtime linking is disabled for the traditional + # shared library, or the linker may eventually find shared libraries + # /with/ Import File - we do not want to mix them. + shared_flag_aix='-shared' + shared_flag_svr4='-shared $wl-G' + else + # not using gcc + if test ia64 = "$host_cpu"; then + # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release + # chokes on -Wl,-G. The following line is correct: + shared_flag='-G' + else + if test yes = "$aix_use_runtimelinking"; then + shared_flag='$wl-G' + else + shared_flag='$wl-bM:SRE' + fi + shared_flag_aix='$wl-bM:SRE' + shared_flag_svr4='$wl-G' + fi + fi + + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-bexpall' + # It seems that -bexpall does not export symbols beginning with + # underscore (_), so it is better to generate a list of symbols to + # export. + _LT_TAGVAR(always_export_symbols, $1)=yes + if test aix,yes = "$with_aix_soname,$aix_use_runtimelinking"; then + # Warning - without using the other runtime loading flags (-brtl), + # -berok will link without error, but may produce a broken library. + # The "-G" linker flag allows undefined symbols. + _LT_TAGVAR(no_undefined_flag, $1)='-bernotok' + # Determine the default libpath from the value encoded in an empty + # executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs $wl'$no_entry_flag' $compiler_flags `if test -n "$allow_undefined_flag"; then func_echo_all "$wl$allow_undefined_flag"; else :; fi` $wl'$exp_sym_flag:\$export_symbols' '$shared_flag + else + if test ia64 = "$host_cpu"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $libdir:/usr/lib:/lib' + _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" + _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\$wl$no_entry_flag"' $compiler_flags $wl$allow_undefined_flag '"\$wl$exp_sym_flag:\$export_symbols" + else + # Determine the default libpath from the value encoded in an + # empty executable. + _LT_SYS_MODULE_PATH_AIX([$1]) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-blibpath:$libdir:'"$aix_libpath" + # Warning - without using the other run time loading flags, + # -berok will link without error, but may produce a broken library. + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-bernotok' + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-berok' + if test yes = "$with_gnu_ld"; then + # We only use this code for GNU lds that support --whole-archive. + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + else + # Exported symbols can be pulled into shared objects from archives + _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' + fi + _LT_TAGVAR(archive_cmds_need_lc, $1)=yes + _LT_TAGVAR(archive_expsym_cmds, $1)='$RM -r $output_objdir/$realname.d~$MKDIR $output_objdir/$realname.d' + # -brtl affects multiple linker settings, -berok does not and is overridden later + compiler_flags_filtered='`func_echo_all "$compiler_flags " | $SED -e "s%-brtl\\([[, ]]\\)%-berok\\1%g"`' + if test svr4 != "$with_aix_soname"; then + # This is similar to how AIX traditionally builds its shared + # libraries. Need -bnortl late, we may have -brtl in LDFLAGS. + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_aix' -o $output_objdir/$realname.d/$soname $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$realname.d/$soname' + fi + if test aix != "$with_aix_soname"; then + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$CC '$shared_flag_svr4' -o $output_objdir/$realname.d/$shared_archive_member_spec.o $libobjs $deplibs $wl-bnoentry '$compiler_flags_filtered'$wl-bE:$export_symbols$allow_undefined_flag~$STRIP -e $output_objdir/$realname.d/$shared_archive_member_spec.o~( func_echo_all "#! $soname($shared_archive_member_spec.o)"; if test shr_64 = "$shared_archive_member_spec"; then func_echo_all "# 64"; else func_echo_all "# 32"; fi; cat $export_symbols ) > $output_objdir/$realname.d/$shared_archive_member_spec.imp~$AR $AR_FLAGS $output_objdir/$soname $output_objdir/$realname.d/$shared_archive_member_spec.o $output_objdir/$realname.d/$shared_archive_member_spec.imp' + else + # used by -dlpreopen to get the symbols + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$MV $output_objdir/$realname.d/$soname $output_objdir' + fi + _LT_TAGVAR(archive_expsym_cmds, $1)="$_LT_TAGVAR(archive_expsym_cmds, $1)"'~$RM -r $output_objdir/$realname.d' + fi + fi + ;; + + beos*) + if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + # Joseph Beckenbach says some releases of gcc + # support --undefined. This deserves some investigation. FIXME + _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + chorus*) + case $cc_basename in + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + cygwin* | mingw* | pw32* | cegcc*) + case $GXX,$cc_basename in + ,cl* | no,cl* | ,icl* | no,icl*) + # Native MSVC or ICC + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + # Tell ltmain to make .lib files, not .a files. + libext=lib + # Tell ltmain to make .dll files, not .so files. + shrext_cmds=.dll + # FIXME: Setting linknames here is a bad hack. + _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~linknames=' + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp "$export_symbols" "$output_objdir/$soname.def"; + echo "$tool_output_objdir$soname.def" > "$output_objdir/$soname.exp"; + else + $SED -e '\''s/^/-link -EXPORT:/'\'' < $export_symbols > $output_objdir/$soname.exp; + fi~ + $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ + linknames=' + # The linker will not automatically build a static lib if we build a DLL. + # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + # Don't use ranlib + _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' + _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ + lt_tool_outputfile="@TOOL_OUTPUT@"~ + case $lt_outputfile in + *.exe|*.EXE) ;; + *) + lt_outputfile=$lt_outputfile.exe + lt_tool_outputfile=$lt_tool_outputfile.exe + ;; + esac~ + func_to_tool_file "$lt_outputfile"~ + if test : != "$MANIFEST_TOOL" && test -f "$lt_outputfile.manifest"; then + $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; + $RM "$lt_outputfile.manifest"; + fi' + ;; + *) + # g++ + # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, + # as there is no search path for DLLs. + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-all-symbols' + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + _LT_TAGVAR(always_export_symbols, $1)=no + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + + if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + # If the export-symbols file already is a .def file, use it as + # is; otherwise, prepend EXPORTS... + _LT_TAGVAR(archive_expsym_cmds, $1)='if _LT_DLL_DEF_P([$export_symbols]); then + cp $export_symbols $output_objdir/$soname.def; + else + echo EXPORTS > $output_objdir/$soname.def; + cat $export_symbols >> $output_objdir/$soname.def; + fi~ + $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname $wl--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + darwin* | rhapsody*) + _LT_DARWIN_LINKER_FEATURES($1) + ;; + + os2*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' + _LT_TAGVAR(hardcode_minus_L, $1)=yes + _LT_TAGVAR(allow_undefined_flag, $1)=unsupported + shrext_cmds=.dll + _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + emxexp $libobjs | $SED /"_DLL_InitTerm"/d >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(archive_expsym_cmds, $1)='$ECHO "LIBRARY ${soname%$shared_ext} INITINSTANCE TERMINSTANCE" > $output_objdir/$libname.def~ + $ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~ + $ECHO "DATA MULTIPLE NONSHARED" >> $output_objdir/$libname.def~ + $ECHO EXPORTS >> $output_objdir/$libname.def~ + prefix_cmds="$SED"~ + if test EXPORTS = "`$SED 1q $export_symbols`"; then + prefix_cmds="$prefix_cmds -e 1d"; + fi~ + prefix_cmds="$prefix_cmds -e \"s/^\(.*\)$/_\1/g\""~ + cat $export_symbols | $prefix_cmds >> $output_objdir/$libname.def~ + $CC -Zdll -Zcrtdll -o $output_objdir/$soname $libobjs $deplibs $compiler_flags $output_objdir/$libname.def~ + emximp -o $lib $output_objdir/$libname.def' + _LT_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/${libname}_dll.a $output_objdir/$libname.def' + _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes + _LT_TAGVAR(file_list_spec, $1)='@' + ;; + + dgux*) + case $cc_basename in + ec++*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + ghcx*) + # Green Hills C++ Compiler + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + freebsd2.*) + # C++ shared libraries reported to be fairly broken before + # switch to ELF + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + freebsd-elf*) + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + ;; + + freebsd* | dragonfly* | midnightbsd*) + # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF + # conventions + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + haiku*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + + hpux9*) + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag $wl+b $wl$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test "x$output_objdir/$soname" = "x$lib" || mv $output_objdir/$soname $lib' + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + hpux10*|hpux11*) + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl+b $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + case $host_cpu in + hppa*64*|ia64*) + ;; + *) + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + ;; + esac + fi + case $host_cpu in + hppa*64*|ia64*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + ;; + *) + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, + # but as the default + # location of the library. + ;; + esac + + case $cc_basename in + CC*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + aCC*) + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -b $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP " \-L"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + case $host_cpu in + hppa*64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC $wl+h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + ia64*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag $wl+h $wl$soname $wl+b $wl$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + ;; + esac + fi + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + interix[[3-9]]*) + _LT_TAGVAR(hardcode_direct, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. + # Instead, shared libraries are loaded at an image base (0x10000000 by + # default) and relocated if they conflict, which is a slow very memory + # consuming and fragmenting process. To avoid this, we pick a random, + # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link + # time. Moving up from 0x10000000 also allows more sbrk(2) space. + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' + ;; + irix5* | irix6*) + case $cc_basename in + CC*) + # SGI C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + + # Archives containing C++ object files must be created using + # "CC -ar", where "CC" is the IRIX C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' + ;; + *) + if test yes = "$GXX"; then + if test no = "$with_gnu_ld"; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + else + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` -o $lib' + fi + fi + _LT_TAGVAR(link_all_deplibs, $1)=yes + ;; + esac + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + _LT_TAGVAR(inherit_rpath, $1)=yes + ;; + + linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib $wl-retain-symbols-file,$export_symbols; mv \$templib $lib' + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + + # Archives containing C++ object files must be created using + # "CC -Bstatic", where "CC" is the KAI C++ compiler. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' + ;; + icpc* | ecpc* ) + # Intel C++ + with_gnu_ld=yes + # version 8.0 and above of icpc choke on multiply defined symbols + # if we add $predep_objects and $postdep_objects, however 7.1 and + # earlier do not add the objects themselves. + case `$CC -V 2>&1` in + *"Version 7."*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 8.0 or newer + tmp_idyn= + case $host_cpu in + ia64*) tmp_idyn=' -i_dynamic';; + esac + _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive$convenience $wl--no-whole-archive' + ;; + pgCC* | pgcpp*) + # Portland Group C++ compiler + case `$CC -V` in + *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) + _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ + compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' + _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ + $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ + $RANLIB $oldlib' + _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ + rm -rf $tpldir~ + $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ + $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + *) # Version 6 and above use weak symbols + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname $wl-retain-symbols-file $wl$export_symbols -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl--rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + ;; + cxx*) + # Compaq C++ + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname -o $lib $wl-retain-symbols-file $wl$export_symbols' + + runpath_var=LD_RUN_PATH + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' + ;; + xl* | mpixl* | bgxl*) + # IBM XL 8.0 on PPC, with GNU ld + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl--export-dynamic' + _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib' + if test yes = "$supports_anon_versioning"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ + cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ + echo "local: *; };" >> $output_objdir/$libname.ver~ + $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib' + fi + ;; + *) + case `$CC -V 2>&1 | $SED 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file $wl$export_symbols' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive' + _LT_TAGVAR(compiler_needs_object, $1)=yes + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; + esac + ;; + + lynxos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + m88k*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + mvs*) + case $cc_basename in + cxx*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + netbsd*) + if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' + wlarc= + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + fi + # Workaround some broken pre-1.5 toolchains + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' + ;; + + *nto* | *qnx*) + _LT_TAGVAR(ld_shlibs, $1)=yes + ;; + + openbsd* | bitrig*) + if test -f /usr/libexec/ld.so; then + _LT_TAGVAR(hardcode_direct, $1)=yes + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_direct_absolute, $1)=yes + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`"; then + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-retain-symbols-file,$export_symbols -o $lib' + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-E' + _LT_TAGVAR(whole_archive_flag_spec, $1)=$wlarc'--whole-archive$convenience '$wlarc'--no-whole-archive' + fi + output_verbose_link_cmd=func_echo_all + else + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + + osf3* | osf4* | osf5*) + case $cc_basename in + KCC*) + # Kuck and Associates, Inc. (KAI) C++ Compiler + + # KCC will only create a shared library if the output file + # ends with ".so" (or ".sl" for HP-UX), so rename the library + # to its proper name (with version) after linking. + _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\$tempext\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Archives containing C++ object files must be created using + # the KAI C++ compiler. + case $host in + osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; + *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; + esac + ;; + RCC*) + # Rational C++ 2.4.1 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + cxx*) + case $host in + osf3*) + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $soname `test -n "$verstring" && func_echo_all "$wl-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + ;; + *) + _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' + _LT_TAGVAR(archive_cmds, $1)='$CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ + echo "-hidden">> $lib.exp~ + $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname $wl-input $wl$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry $output_objdir/so_locations -o $lib~ + $RM $lib.exp' + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + # + # There doesn't appear to be a way to prevent this compiler from + # explicitly linking system object files so we need to strip them + # from the output so that they don't get included in the library + # dependencies. + output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list= ; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' + ;; + *) + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(allow_undefined_flag, $1)=' $wl-expect_unresolved $wl\*' + case $host in + osf3*) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-msym $wl-soname $wl$soname `test -n "$verstring" && func_echo_all "$wl-set_version $wl$verstring"` $wl-update_registry $wl$output_objdir/so_locations -o $lib' + ;; + esac + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-rpath $wl$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=: + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + + else + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + fi + ;; + esac + ;; + + psos*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + sunos4*) + case $cc_basename in + CC*) + # Sun C++ 4.x + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + lcc*) + # Lucid + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + solaris*) + case $cc_basename in + CC* | sunCC*) + # Sun C++ 4.2, 5.x and Centerline C++ + _LT_TAGVAR(archive_cmds_need_lc,$1)=yes + _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_TAGVAR(archive_cmds, $1)='$CC -G$allow_undefined_flag -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G$allow_undefined_flag $wl-M $wl$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + # The compiler driver will combine and reorder linker options, + # but understands '-z linker_flag'. + # Supported since Solaris 2.6 (maybe 2.5.1?) + _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' + ;; + esac + _LT_TAGVAR(link_all_deplibs, $1)=yes + + output_verbose_link_cmd='func_echo_all' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + gcx*) + # Green Hills C++ Compiler + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + + # The C++ compiler must be used to create the archive. + _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' + ;; + *) + # GNU C++ compiler with Solaris linker + if test yes,no = "$GXX,$with_gnu_ld"; then + _LT_TAGVAR(no_undefined_flag, $1)=' $wl-z ${wl}defs' + if $CC --version | $GREP -v '^2\.7' > /dev/null; then + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -shared $pic_flag -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + else + # g++ 2.7 appears to require '-G' NOT '-shared' on this + # platform. + _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags $wl-h $wl$soname -o $lib' + _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ + $CC -G -nostdlib $wl-M $wl$lib.exp $wl-h $wl$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' + + # Commands to make compiler produce verbose output that lists + # what "hidden" libraries, object files and flags are used when + # linking a shared library. + output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP " \-L"' + fi + + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl-z ${wl}allextract$convenience $wl-z ${wl}defaultextract' + ;; + esac + fi + ;; + esac + ;; + + sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + sysv5* | sco3.2v5* | sco5v6*) + # Note: We CANNOT use -z defs as we might desire, because we do not + # link with -lc, and that would cause any symbols used from libc to + # always be unresolved, which means just about no library would + # ever link correctly. If we're not using GNU ld we use -z text + # though, which does catch some bad symbols but isn't as heavy-handed + # as -z defs. + _LT_TAGVAR(no_undefined_flag, $1)='$wl-z,text' + _LT_TAGVAR(allow_undefined_flag, $1)='$wl-z,nodefs' + _LT_TAGVAR(archive_cmds_need_lc, $1)=no + _LT_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='$wl-R,$libdir' + _LT_TAGVAR(hardcode_libdir_separator, $1)=':' + _LT_TAGVAR(link_all_deplibs, $1)=yes + _LT_TAGVAR(export_dynamic_flag_spec, $1)='$wl-Bexport' + runpath_var='LD_RUN_PATH' + + case $cc_basename in + CC*) + _LT_TAGVAR(archive_cmds, $1)='$CC -G $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ + '"$_LT_TAGVAR(old_archive_cmds, $1)" + _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ + '"$_LT_TAGVAR(reload_cmds, $1)" + ;; + *) + _LT_TAGVAR(archive_cmds, $1)='$CC -shared $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $wl-Bexport:$export_symbols $wl-h,$soname -o $lib $libobjs $deplibs $compiler_flags' + ;; + esac + ;; + + tandem*) + case $cc_basename in + NCC*) + # NonStop-UX NCC 3.20 + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + ;; + + vxworks*) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + + *) + # FIXME: insert proper C++ library support + _LT_TAGVAR(ld_shlibs, $1)=no + ;; + esac + + AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) + test no = "$_LT_TAGVAR(ld_shlibs, $1)" && can_build_shared=no + + _LT_TAGVAR(GCC, $1)=$GXX + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS + LDCXX=$LD + LD=$lt_save_LD + GCC=$lt_save_GCC + with_gnu_ld=$lt_save_with_gnu_ld + lt_cv_path_LDCXX=$lt_cv_path_LD + lt_cv_path_LD=$lt_save_path_LD + lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld + lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld +fi # test yes != "$_lt_caught_CXX_error" + +AC_LANG_POP +])# _LT_LANG_CXX_CONFIG + + +# _LT_FUNC_STRIPNAME_CNF +# ---------------------- +# func_stripname_cnf prefix suffix name +# strip PREFIX and SUFFIX off of NAME. +# PREFIX and SUFFIX must not contain globbing or regex special +# characters, hashes, percent signs, but SUFFIX may contain a leading +# dot (in which case that matches only a dot). +# +# This function is identical to the (non-XSI) version of func_stripname, +# except this one can be used by m4 code that may be executed by configure, +# rather than the libtool script. +m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl +AC_REQUIRE([_LT_DECL_SED]) +AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) +func_stripname_cnf () +{ + case @S|@2 in + .*) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%\\\\@S|@2\$%%"`;; + *) func_stripname_result=`$ECHO "@S|@3" | $SED "s%^@S|@1%%; s%@S|@2\$%%"`;; + esac +} # func_stripname_cnf +])# _LT_FUNC_STRIPNAME_CNF + + +# _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) +# --------------------------------- +# Figure out "hidden" library dependencies from verbose +# compiler output when linking a shared library. +# Parse the compiler output and extract the necessary +# objects, libraries and library flags. +m4_defun([_LT_SYS_HIDDEN_LIBDEPS], +[m4_require([_LT_FILEUTILS_DEFAULTS])dnl +AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl +# Dependencies to place before and after the object being linked: +_LT_TAGVAR(predep_objects, $1)= +_LT_TAGVAR(postdep_objects, $1)= +_LT_TAGVAR(predeps, $1)= +_LT_TAGVAR(postdeps, $1)= +_LT_TAGVAR(compiler_lib_search_path, $1)= + +dnl we can't use the lt_simple_compile_test_code here, +dnl because it contains code intended for an executable, +dnl not a library. It's possible we should let each +dnl tag define a new lt_????_link_test_code variable, +dnl but it's only used here... +m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF +int a; +void foo (void) { a = 0; } +_LT_EOF +], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF +class Foo +{ +public: + Foo (void) { a = 0; } +private: + int a; +}; +_LT_EOF +], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer*4 a + a=0 + return + end +_LT_EOF +], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF + subroutine foo + implicit none + integer a + a=0 + return + end +_LT_EOF +], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF +public class foo { + private int a; + public void bar (void) { + a = 0; + } +}; +_LT_EOF +], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF +package foo +func foo() { +} +_LT_EOF +]) + +_lt_libdeps_save_CFLAGS=$CFLAGS +case "$CC $CFLAGS " in #( +*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; +*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; +*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; +esac + +dnl Parse the compiler output and extract the necessary +dnl objects, libraries and library flags. +if AC_TRY_EVAL(ac_compile); then + # Parse the compiler output and extract the necessary + # objects, libraries and library flags. + + # Sentinel used to keep track of whether or not we are before + # the conftest object file. + pre_test_object_deps_done=no + + for p in `eval "$output_verbose_link_cmd"`; do + case $prev$p in + + -L* | -R* | -l*) + # Some compilers place space between "-{L,R}" and the path. + # Remove the space. + if test x-L = "$p" || + test x-R = "$p"; then + prev=$p + continue + fi + + # Expand the sysroot to ease extracting the directories later. + if test -z "$prev"; then + case $p in + -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; + -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; + -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; + esac + fi + case $p in + =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; + esac + if test no = "$pre_test_object_deps_done"; then + case $prev in + -L | -R) + # Internal compiler library paths should come after those + # provided the user. The postdeps already come after the + # user supplied libs so there is no need to process them. + if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_TAGVAR(compiler_lib_search_path, $1)=$prev$p + else + _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} $prev$p" + fi + ;; + # The "-l" case would never come before the object being + # linked, so don't bother handling this case. + esac + else + if test -z "$_LT_TAGVAR(postdeps, $1)"; then + _LT_TAGVAR(postdeps, $1)=$prev$p + else + _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} $prev$p" + fi + fi + prev= + ;; + + *.lto.$objext) ;; # Ignore GCC LTO objects + *.$objext) + # This assumes that the test object file only shows up + # once in the compiler output. + if test "$p" = "conftest.$objext"; then + pre_test_object_deps_done=yes + continue + fi + + if test no = "$pre_test_object_deps_done"; then + if test -z "$_LT_TAGVAR(predep_objects, $1)"; then + _LT_TAGVAR(predep_objects, $1)=$p + else + _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" + fi + else + if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then + _LT_TAGVAR(postdep_objects, $1)=$p + else + _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" + fi + fi + ;; + + *) ;; # Ignore the rest. + + esac + done + + # Clean up. + rm -f a.out a.exe +else + echo "libtool.m4: error: problem compiling $1 test program" +fi + +$RM -f confest.$objext +CFLAGS=$_lt_libdeps_save_CFLAGS + +# PORTME: override above test on systems where it is broken +m4_if([$1], [CXX], +[case $host_os in +interix[[3-9]]*) + # Interix 3.5 installs completely hosed .la files for C++, so rather than + # hack all around it, let's just trust "g++" to DTRT. + _LT_TAGVAR(predep_objects,$1)= + _LT_TAGVAR(postdep_objects,$1)= + _LT_TAGVAR(postdeps,$1)= + ;; +esac +]) + +case " $_LT_TAGVAR(postdeps, $1) " in +*" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; +esac + _LT_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then + _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | $SED -e 's! -L! !g' -e 's!^ !!'` +fi +_LT_TAGDECL([], [compiler_lib_search_dirs], [1], + [The directories searched by this compiler when creating a shared library]) +_LT_TAGDECL([], [predep_objects], [1], + [Dependencies to place before and after the objects being linked to + create a shared library]) +_LT_TAGDECL([], [postdep_objects], [1]) +_LT_TAGDECL([], [predeps], [1]) +_LT_TAGDECL([], [postdeps], [1]) +_LT_TAGDECL([], [compiler_lib_search_path], [1], + [The library search path used internally by the compiler when linking + a shared library]) +])# _LT_SYS_HIDDEN_LIBDEPS + + +# _LT_LANG_F77_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for a Fortran 77 compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_F77_CONFIG], +[AC_LANG_PUSH(Fortran 77) +if test -z "$F77" || test no = "$F77"; then + _lt_disable_F77=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for f77 test sources. +ac_ext=f + +# Object file extension for compiled f77 test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the F77 compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_F77"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${F77-"f77"} + CFLAGS=$FFLAGS + compiler=$CC + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + GCC=$G77 + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$G77 + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_F77" + +AC_LANG_POP +])# _LT_LANG_F77_CONFIG + + +# _LT_LANG_FC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for a Fortran compiler are +# suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_FC_CONFIG], +[AC_LANG_PUSH(Fortran) + +if test -z "$FC" || test no = "$FC"; then + _lt_disable_FC=yes +fi + +_LT_TAGVAR(archive_cmds_need_lc, $1)=no +_LT_TAGVAR(allow_undefined_flag, $1)= +_LT_TAGVAR(always_export_symbols, $1)=no +_LT_TAGVAR(archive_expsym_cmds, $1)= +_LT_TAGVAR(export_dynamic_flag_spec, $1)= +_LT_TAGVAR(hardcode_direct, $1)=no +_LT_TAGVAR(hardcode_direct_absolute, $1)=no +_LT_TAGVAR(hardcode_libdir_flag_spec, $1)= +_LT_TAGVAR(hardcode_libdir_separator, $1)= +_LT_TAGVAR(hardcode_minus_L, $1)=no +_LT_TAGVAR(hardcode_automatic, $1)=no +_LT_TAGVAR(inherit_rpath, $1)=no +_LT_TAGVAR(module_cmds, $1)= +_LT_TAGVAR(module_expsym_cmds, $1)= +_LT_TAGVAR(link_all_deplibs, $1)=unknown +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds +_LT_TAGVAR(no_undefined_flag, $1)= +_LT_TAGVAR(whole_archive_flag_spec, $1)= +_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no + +# Source file extension for fc test sources. +ac_ext=${ac_fc_srcext-f} + +# Object file extension for compiled fc test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# No sense in running all these tests if we already determined that +# the FC compiler isn't working. Some variables (like enable_shared) +# are currently assumed to apply to all compilers on this platform, +# and will be corrupted by setting them based on a non-working compiler. +if test yes != "$_lt_disable_FC"; then + # Code to be used in simple compile tests + lt_simple_compile_test_code="\ + subroutine t + return + end +" + + # Code to be used in simple link tests + lt_simple_link_test_code="\ + program t + end +" + + # ltmain only uses $CC for tagged configurations so make sure $CC is set. + _LT_TAG_COMPILER + + # save warnings/boilerplate of simple test code + _LT_COMPILER_BOILERPLATE + _LT_LINKER_BOILERPLATE + + # Allow CC to be a program name with arguments. + lt_save_CC=$CC + lt_save_GCC=$GCC + lt_save_CFLAGS=$CFLAGS + CC=${FC-"f95"} + CFLAGS=$FCFLAGS + compiler=$CC + GCC=$ac_cv_fc_compiler_gnu + + _LT_TAGVAR(compiler, $1)=$CC + _LT_CC_BASENAME([$compiler]) + + if test -n "$compiler"; then + AC_MSG_CHECKING([if libtool supports shared libraries]) + AC_MSG_RESULT([$can_build_shared]) + + AC_MSG_CHECKING([whether to build shared libraries]) + test no = "$can_build_shared" && enable_shared=no + + # On AIX, shared libraries and static libraries use the same namespace, and + # are all built from PIC. + case $host_os in + aix3*) + test yes = "$enable_shared" && enable_static=no + if test -n "$RANLIB"; then + archive_cmds="$archive_cmds~\$RANLIB \$lib" + postinstall_cmds='$RANLIB $lib' + fi + ;; + aix[[4-9]]*) + if test ia64 != "$host_cpu"; then + case $enable_shared,$with_aix_soname,$aix_use_runtimelinking in + yes,aix,yes) ;; # shared object as lib.so file only + yes,svr4,*) ;; # shared object as lib.so archive member only + yes,*) enable_static=no ;; # shared object in lib.a archive as well + esac + fi + ;; + esac + AC_MSG_RESULT([$enable_shared]) + + AC_MSG_CHECKING([whether to build static libraries]) + # Make sure either enable_shared or enable_static is yes. + test yes = "$enable_shared" || enable_static=yes + AC_MSG_RESULT([$enable_static]) + + _LT_TAGVAR(GCC, $1)=$ac_cv_fc_compiler_gnu + _LT_TAGVAR(LD, $1)=$LD + + ## CAVEAT EMPTOR: + ## There is no encapsulation within the following macros, do not change + ## the running order or otherwise move them around unless you know exactly + ## what you are doing... + _LT_SYS_HIDDEN_LIBDEPS($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_SYS_DYNAMIC_LINKER($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) + fi # test -n "$compiler" + + GCC=$lt_save_GCC + CC=$lt_save_CC + CFLAGS=$lt_save_CFLAGS +fi # test yes != "$_lt_disable_FC" + +AC_LANG_POP +])# _LT_LANG_FC_CONFIG + + +# _LT_LANG_GCJ_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Java Compiler compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GCJ_CONFIG], +[AC_REQUIRE([LT_PROG_GCJ])dnl +AC_LANG_SAVE + +# Source file extension for Java test sources. +ac_ext=java + +# Object file extension for compiled Java test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="class foo {}" + +# Code to be used in simple link tests +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GCJ-"gcj"} +CFLAGS=$GCJFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# GCJ did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GCJ_CONFIG + + +# _LT_LANG_GO_CONFIG([TAG]) +# -------------------------- +# Ensure that the configuration variables for the GNU Go compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_GO_CONFIG], +[AC_REQUIRE([LT_PROG_GO])dnl +AC_LANG_SAVE + +# Source file extension for Go test sources. +ac_ext=go + +# Object file extension for compiled Go test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code="package main; func main() { }" + +# Code to be used in simple link tests +lt_simple_link_test_code='package main; func main() { }' + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC=yes +CC=${GOC-"gccgo"} +CFLAGS=$GOFLAGS +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_TAGVAR(LD, $1)=$LD +_LT_CC_BASENAME([$compiler]) + +# Go did not exist at the time GCC didn't implicitly link libc in. +_LT_TAGVAR(archive_cmds_need_lc, $1)=no + +_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds +_LT_TAGVAR(reload_flag, $1)=$reload_flag +_LT_TAGVAR(reload_cmds, $1)=$reload_cmds + +## CAVEAT EMPTOR: +## There is no encapsulation within the following macros, do not change +## the running order or otherwise move them around unless you know exactly +## what you are doing... +if test -n "$compiler"; then + _LT_COMPILER_NO_RTTI($1) + _LT_COMPILER_PIC($1) + _LT_COMPILER_C_O($1) + _LT_COMPILER_FILE_LOCKS($1) + _LT_LINKER_SHLIBS($1) + _LT_LINKER_HARDCODE_LIBPATH($1) + + _LT_CONFIG($1) +fi + +AC_LANG_RESTORE + +GCC=$lt_save_GCC +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_GO_CONFIG + + +# _LT_LANG_RC_CONFIG([TAG]) +# ------------------------- +# Ensure that the configuration variables for the Windows resource compiler +# are suitably defined. These variables are subsequently used by _LT_CONFIG +# to write the compiler configuration to 'libtool'. +m4_defun([_LT_LANG_RC_CONFIG], +[AC_REQUIRE([LT_PROG_RC])dnl +AC_LANG_SAVE + +# Source file extension for RC test sources. +ac_ext=rc + +# Object file extension for compiled RC test sources. +objext=o +_LT_TAGVAR(objext, $1)=$objext + +# Code to be used in simple compile tests +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' + +# Code to be used in simple link tests +lt_simple_link_test_code=$lt_simple_compile_test_code + +# ltmain only uses $CC for tagged configurations so make sure $CC is set. +_LT_TAG_COMPILER + +# save warnings/boilerplate of simple test code +_LT_COMPILER_BOILERPLATE +_LT_LINKER_BOILERPLATE + +# Allow CC to be a program name with arguments. +lt_save_CC=$CC +lt_save_CFLAGS=$CFLAGS +lt_save_GCC=$GCC +GCC= +CC=${RC-"windres"} +CFLAGS= +compiler=$CC +_LT_TAGVAR(compiler, $1)=$CC +_LT_CC_BASENAME([$compiler]) +_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes + +if test -n "$compiler"; then + : + _LT_CONFIG($1) +fi + +GCC=$lt_save_GCC +AC_LANG_RESTORE +CC=$lt_save_CC +CFLAGS=$lt_save_CFLAGS +])# _LT_LANG_RC_CONFIG + + +# LT_PROG_GCJ +# ----------- +AC_DEFUN([LT_PROG_GCJ], +[m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], + [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], + [AC_CHECK_TOOL(GCJ, gcj,) + test set = "${GCJFLAGS+set}" || GCJFLAGS="-g -O2" + AC_SUBST(GCJFLAGS)])])[]dnl +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_GCJ], []) + + +# LT_PROG_GO +# ---------- +AC_DEFUN([LT_PROG_GO], +[AC_CHECK_TOOL(GOC, gccgo,) +]) + + +# LT_PROG_RC +# ---------- +AC_DEFUN([LT_PROG_RC], +[AC_CHECK_TOOL(RC, windres,) +]) + +# Old name: +AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_RC], []) + + +# _LT_DECL_EGREP +# -------------- +# If we don't have a new enough Autoconf to choose the best grep +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_EGREP], +[AC_REQUIRE([AC_PROG_EGREP])dnl +AC_REQUIRE([AC_PROG_FGREP])dnl +test -z "$GREP" && GREP=grep +_LT_DECL([], [GREP], [1], [A grep program that handles long lines]) +_LT_DECL([], [EGREP], [1], [An ERE matcher]) +_LT_DECL([], [FGREP], [1], [A literal string matcher]) +dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too +AC_SUBST([GREP]) +]) + + +# _LT_DECL_OBJDUMP +# -------------- +# If we don't have a new enough Autoconf to choose the best objdump +# available, choose the one first in the user's PATH. +m4_defun([_LT_DECL_OBJDUMP], +[AC_CHECK_TOOL(OBJDUMP, objdump, false) +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) +AC_SUBST([OBJDUMP]) +]) + +# _LT_DECL_DLLTOOL +# ---------------- +# Ensure DLLTOOL variable is set. +m4_defun([_LT_DECL_DLLTOOL], +[AC_CHECK_TOOL(DLLTOOL, dlltool, false) +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program]) +AC_SUBST([DLLTOOL]) +]) + +# _LT_DECL_FILECMD +# ---------------- +# Check for a file(cmd) program that can be used to detect file type and magic +m4_defun([_LT_DECL_FILECMD], +[AC_CHECK_TOOL([FILECMD], [file], [:]) +_LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types]) +])# _LD_DECL_FILECMD + +# _LT_DECL_SED +# ------------ +# Check for a fully-functional sed program, that truncates +# as few characters as possible. Prefer GNU sed if found. +m4_defun([_LT_DECL_SED], +[AC_PROG_SED +test -z "$SED" && SED=sed +Xsed="$SED -e 1s/^X//" +_LT_DECL([], [SED], [1], [A sed program that does not truncate output]) +_LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], + [Sed that helps us avoid accidentally triggering echo(1) options like -n]) +])# _LT_DECL_SED + +m4_ifndef([AC_PROG_SED], [ +############################################################ +# NOTE: This macro has been submitted for inclusion into # +# GNU Autoconf as AC_PROG_SED. When it is available in # +# a released version of Autoconf we should remove this # +# macro and use it instead. # +############################################################ + +m4_defun([AC_PROG_SED], +[AC_MSG_CHECKING([for a sed that does not truncate output]) +AC_CACHE_VAL(lt_cv_path_SED, +[# Loop through the user's path and test for sed and gsed. +# Then use that list of sed's as ones to test for truncation. +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for lt_ac_prog in sed gsed; do + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then + lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" + fi + done + done +done +IFS=$as_save_IFS +lt_ac_max=0 +lt_ac_count=0 +# Add /usr/xpg4/bin/sed as it is typically found on Solaris +# along with /bin/sed that truncates output. +for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do + test ! -f "$lt_ac_sed" && continue + cat /dev/null > conftest.in + lt_ac_count=0 + echo $ECHO_N "0123456789$ECHO_C" >conftest.in + # Check for GNU sed and select it if it is found. + if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then + lt_cv_path_SED=$lt_ac_sed + break + fi + while true; do + cat conftest.in conftest.in >conftest.tmp + mv conftest.tmp conftest.in + cp conftest.in conftest.nl + echo >>conftest.nl + $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break + cmp -s conftest.out conftest.nl || break + # 10000 chars as input seems more than enough + test 10 -lt "$lt_ac_count" && break + lt_ac_count=`expr $lt_ac_count + 1` + if test "$lt_ac_count" -gt "$lt_ac_max"; then + lt_ac_max=$lt_ac_count + lt_cv_path_SED=$lt_ac_sed + fi + done +done +]) +SED=$lt_cv_path_SED +AC_SUBST([SED]) +AC_MSG_RESULT([$SED]) +])#AC_PROG_SED +])#m4_ifndef + +# Old name: +AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([LT_AC_PROG_SED], []) + + +# _LT_CHECK_SHELL_FEATURES +# ------------------------ +# Find out whether the shell is Bourne or XSI compatible, +# or has some other useful features. +m4_defun([_LT_CHECK_SHELL_FEATURES], +[if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + lt_unset=unset +else + lt_unset=false +fi +_LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl + +# test EBCDIC or ASCII +case `echo X|tr X '\101'` in + A) # ASCII based system + # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr + lt_SP2NL='tr \040 \012' + lt_NL2SP='tr \015\012 \040\040' + ;; + *) # EBCDIC based system + lt_SP2NL='tr \100 \n' + lt_NL2SP='tr \r\n \100\100' + ;; +esac +_LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl +_LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl +])# _LT_CHECK_SHELL_FEATURES + + +# _LT_PATH_CONVERSION_FUNCTIONS +# ----------------------------- +# Determine what file name conversion functions should be used by +# func_to_host_file (and, implicitly, by func_to_host_path). These are needed +# for certain cross-compile configurations and native mingw. +m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +AC_REQUIRE([AC_CANONICAL_BUILD])dnl +AC_MSG_CHECKING([how to convert $build file names to $host format]) +AC_CACHE_VAL(lt_cv_to_host_file_cmd, +[case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 + ;; + esac + ;; + *-*-cygwin* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin + ;; + *-*-cygwin* ) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; + * ) # otherwise, assume *nix + lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin + ;; + esac + ;; + * ) # unhandled hosts (and "normal" native builds) + lt_cv_to_host_file_cmd=func_convert_file_noop + ;; +esac +]) +to_host_file_cmd=$lt_cv_to_host_file_cmd +AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) +_LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], + [0], [convert $build file names to $host format])dnl + +AC_MSG_CHECKING([how to convert $build file names to toolchain format]) +AC_CACHE_VAL(lt_cv_to_tool_file_cmd, +[#assume ordinary cross tools, or native build. +lt_cv_to_tool_file_cmd=func_convert_file_noop +case $host in + *-*-mingw* ) + case $build in + *-*-mingw* ) # actually msys + lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 + ;; + esac + ;; +esac +]) +to_tool_file_cmd=$lt_cv_to_tool_file_cmd +AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) +_LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], + [0], [convert $build files to toolchain format])dnl +])# _LT_PATH_CONVERSION_FUNCTIONS diff --git a/3rd/pcre2/m4/ltoptions.m4 b/3rd/pcre2/m4/ltoptions.m4 new file mode 100644 index 00000000..b0b5e9c2 --- /dev/null +++ b/3rd/pcre2/m4/ltoptions.m4 @@ -0,0 +1,437 @@ +# Helper functions for option handling. -*- Autoconf -*- +# +# Copyright (C) 2004-2005, 2007-2009, 2011-2019, 2021-2022 Free +# Software Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 8 ltoptions.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) + + +# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) +# ------------------------------------------ +m4_define([_LT_MANGLE_OPTION], +[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) + + +# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) +# --------------------------------------- +# Set option OPTION-NAME for macro MACRO-NAME, and if there is a +# matching handler defined, dispatch to it. Other OPTION-NAMEs are +# saved as a flag. +m4_define([_LT_SET_OPTION], +[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl +m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), + _LT_MANGLE_DEFUN([$1], [$2]), + [m4_warning([Unknown $1 option '$2'])])[]dnl +]) + + +# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) +# ------------------------------------------------------------ +# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. +m4_define([_LT_IF_OPTION], +[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) + + +# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) +# ------------------------------------------------------- +# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME +# are set. +m4_define([_LT_UNLESS_OPTIONS], +[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), + [m4_define([$0_found])])])[]dnl +m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 +])[]dnl +]) + + +# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) +# ---------------------------------------- +# OPTION-LIST is a space-separated list of Libtool options associated +# with MACRO-NAME. If any OPTION has a matching handler declared with +# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about +# the unknown option and exit. +m4_defun([_LT_SET_OPTIONS], +[# Set options +m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), + [_LT_SET_OPTION([$1], _LT_Option)]) + +m4_if([$1],[LT_INIT],[ + dnl + dnl Simply set some default values (i.e off) if boolean options were not + dnl specified: + _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no + ]) + _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no + ]) + dnl + dnl If no reference was made to various pairs of opposing options, then + dnl we run the default mode handler for the pair. For example, if neither + dnl 'shared' nor 'disable-shared' was passed, we enable building of shared + dnl archives by default: + _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) + _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) + _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], + [_LT_ENABLE_FAST_INSTALL]) + _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], + [_LT_WITH_AIX_SONAME([aix])]) + ]) +])# _LT_SET_OPTIONS + + +## --------------------------------- ## +## Macros to handle LT_INIT options. ## +## --------------------------------- ## + +# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) +# ----------------------------------------- +m4_define([_LT_MANGLE_DEFUN], +[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) + + +# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) +# ----------------------------------------------- +m4_define([LT_OPTION_DEFINE], +[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl +])# LT_OPTION_DEFINE + + +# dlopen +# ------ +LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes +]) + +AU_DEFUN([AC_LIBTOOL_DLOPEN], +[_LT_SET_OPTION([LT_INIT], [dlopen]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'dlopen' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) + + +# win32-dll +# --------- +# Declare package support for building win32 dll's. +LT_OPTION_DEFINE([LT_INIT], [win32-dll], +[enable_win32_dll=yes + +case $host in +*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) + AC_CHECK_TOOL(AS, as, false) + AC_CHECK_TOOL(DLLTOOL, dlltool, false) + AC_CHECK_TOOL(OBJDUMP, objdump, false) + ;; +esac + +test -z "$AS" && AS=as +_LT_DECL([], [AS], [1], [Assembler program])dnl + +test -z "$DLLTOOL" && DLLTOOL=dlltool +_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl + +test -z "$OBJDUMP" && OBJDUMP=objdump +_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl +])# win32-dll + +AU_DEFUN([AC_LIBTOOL_WIN32_DLL], +[AC_REQUIRE([AC_CANONICAL_HOST])dnl +_LT_SET_OPTION([LT_INIT], [win32-dll]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'win32-dll' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) + + +# _LT_ENABLE_SHARED([DEFAULT]) +# ---------------------------- +# implement the --enable-shared flag, and supports the 'shared' and +# 'disable-shared' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_SHARED], +[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([shared], + [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], + [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_shared=yes ;; + no) enable_shared=no ;; + *) + enable_shared=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_shared=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) + + _LT_DECL([build_libtool_libs], [enable_shared], [0], + [Whether or not to build shared libraries]) +])# _LT_ENABLE_SHARED + +LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) +]) + +AC_DEFUN([AC_DISABLE_SHARED], +[_LT_SET_OPTION([LT_INIT], [disable-shared]) +]) + +AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) +AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_SHARED], []) +dnl AC_DEFUN([AM_DISABLE_SHARED], []) + + + +# _LT_ENABLE_STATIC([DEFAULT]) +# ---------------------------- +# implement the --enable-static flag, and support the 'static' and +# 'disable-static' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_STATIC], +[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([static], + [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], + [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_static=yes ;; + no) enable_static=no ;; + *) + enable_static=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_static=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_static=]_LT_ENABLE_STATIC_DEFAULT) + + _LT_DECL([build_old_libs], [enable_static], [0], + [Whether or not to build static libraries]) +])# _LT_ENABLE_STATIC + +LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) + +# Old names: +AC_DEFUN([AC_ENABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) +]) + +AC_DEFUN([AC_DISABLE_STATIC], +[_LT_SET_OPTION([LT_INIT], [disable-static]) +]) + +AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) +AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AM_ENABLE_STATIC], []) +dnl AC_DEFUN([AM_DISABLE_STATIC], []) + + + +# _LT_ENABLE_FAST_INSTALL([DEFAULT]) +# ---------------------------------- +# implement the --enable-fast-install flag, and support the 'fast-install' +# and 'disable-fast-install' LT_INIT options. +# DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. +m4_define([_LT_ENABLE_FAST_INSTALL], +[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl +AC_ARG_ENABLE([fast-install], + [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], + [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], + [p=${PACKAGE-default} + case $enableval in + yes) enable_fast_install=yes ;; + no) enable_fast_install=no ;; + *) + enable_fast_install=no + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for pkg in $enableval; do + IFS=$lt_save_ifs + if test "X$pkg" = "X$p"; then + enable_fast_install=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) + +_LT_DECL([fast_install], [enable_fast_install], [0], + [Whether or not to optimize for fast installation])dnl +])# _LT_ENABLE_FAST_INSTALL + +LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) +LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) + +# Old names: +AU_DEFUN([AC_ENABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'fast-install' option into LT_INIT's first parameter.]) +]) + +AU_DEFUN([AC_DISABLE_FAST_INSTALL], +[_LT_SET_OPTION([LT_INIT], [disable-fast-install]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you put +the 'disable-fast-install' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) +dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) + + +# _LT_WITH_AIX_SONAME([DEFAULT]) +# ---------------------------------- +# implement the --with-aix-soname flag, and support the `aix-soname=aix' +# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT +# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. +m4_define([_LT_WITH_AIX_SONAME], +[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl +shared_archive_member_spec= +case $host,$enable_shared in +power*-*-aix[[5-9]]*,yes) + AC_MSG_CHECKING([which variant of shared library versioning to provide]) + AC_ARG_WITH([aix-soname], + [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], + [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], + [case $withval in + aix|svr4|both) + ;; + *) + AC_MSG_ERROR([Unknown argument to --with-aix-soname]) + ;; + esac + lt_cv_with_aix_soname=$with_aix_soname], + [AC_CACHE_VAL([lt_cv_with_aix_soname], + [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) + with_aix_soname=$lt_cv_with_aix_soname]) + AC_MSG_RESULT([$with_aix_soname]) + if test aix != "$with_aix_soname"; then + # For the AIX way of multilib, we name the shared archive member + # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', + # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. + # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, + # the AIX toolchain works better with OBJECT_MODE set (default 32). + if test 64 = "${OBJECT_MODE-32}"; then + shared_archive_member_spec=shr_64 + else + shared_archive_member_spec=shr + fi + fi + ;; +*) + with_aix_soname=aix + ;; +esac + +_LT_DECL([], [shared_archive_member_spec], [0], + [Shared archive member basename, for filename based shared library versioning on AIX])dnl +])# _LT_WITH_AIX_SONAME + +LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) +LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) + + +# _LT_WITH_PIC([MODE]) +# -------------------- +# implement the --with-pic flag, and support the 'pic-only' and 'no-pic' +# LT_INIT options. +# MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. +m4_define([_LT_WITH_PIC], +[AC_ARG_WITH([pic], + [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], + [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], + [lt_p=${PACKAGE-default} + case $withval in + yes|no) pic_mode=$withval ;; + *) + pic_mode=default + # Look at the argument we got. We use all the common list separators. + lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, + for lt_pkg in $withval; do + IFS=$lt_save_ifs + if test "X$lt_pkg" = "X$lt_p"; then + pic_mode=yes + fi + done + IFS=$lt_save_ifs + ;; + esac], + [pic_mode=m4_default([$1], [default])]) + +_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl +])# _LT_WITH_PIC + +LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) +LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) + +# Old name: +AU_DEFUN([AC_LIBTOOL_PICMODE], +[_LT_SET_OPTION([LT_INIT], [pic-only]) +AC_DIAGNOSE([obsolete], +[$0: Remove this warning and the call to _LT_SET_OPTION when you +put the 'pic-only' option into LT_INIT's first parameter.]) +]) + +dnl aclocal-1.4 backwards compatibility: +dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) + +## ----------------- ## +## LTDL_INIT Options ## +## ----------------- ## + +m4_define([_LTDL_MODE], []) +LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], + [m4_define([_LTDL_MODE], [nonrecursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [recursive], + [m4_define([_LTDL_MODE], [recursive])]) +LT_OPTION_DEFINE([LTDL_INIT], [subproject], + [m4_define([_LTDL_MODE], [subproject])]) + +m4_define([_LTDL_TYPE], []) +LT_OPTION_DEFINE([LTDL_INIT], [installable], + [m4_define([_LTDL_TYPE], [installable])]) +LT_OPTION_DEFINE([LTDL_INIT], [convenience], + [m4_define([_LTDL_TYPE], [convenience])]) diff --git a/3rd/pcre2/m4/ltsugar.m4 b/3rd/pcre2/m4/ltsugar.m4 new file mode 100644 index 00000000..902508bd --- /dev/null +++ b/3rd/pcre2/m4/ltsugar.m4 @@ -0,0 +1,124 @@ +# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007-2008, 2011-2019, 2021-2022 Free Software +# Foundation, Inc. +# Written by Gary V. Vaughan, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 6 ltsugar.m4 + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) + + +# lt_join(SEP, ARG1, [ARG2...]) +# ----------------------------- +# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their +# associated separator. +# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier +# versions in m4sugar had bugs. +m4_define([lt_join], +[m4_if([$#], [1], [], + [$#], [2], [[$2]], + [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) +m4_define([_lt_join], +[m4_if([$#$2], [2], [], + [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) + + +# lt_car(LIST) +# lt_cdr(LIST) +# ------------ +# Manipulate m4 lists. +# These macros are necessary as long as will still need to support +# Autoconf-2.59, which quotes differently. +m4_define([lt_car], [[$1]]) +m4_define([lt_cdr], +[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], + [$#], 1, [], + [m4_dquote(m4_shift($@))])]) +m4_define([lt_unquote], $1) + + +# lt_append(MACRO-NAME, STRING, [SEPARATOR]) +# ------------------------------------------ +# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. +# Note that neither SEPARATOR nor STRING are expanded; they are appended +# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). +# No SEPARATOR is output if MACRO-NAME was previously undefined (different +# than defined and empty). +# +# This macro is needed until we can rely on Autoconf 2.62, since earlier +# versions of m4sugar mistakenly expanded SEPARATOR but not STRING. +m4_define([lt_append], +[m4_define([$1], + m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) + + + +# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) +# ---------------------------------------------------------- +# Produce a SEP delimited list of all paired combinations of elements of +# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list +# has the form PREFIXmINFIXSUFFIXn. +# Needed until we can rely on m4_combine added in Autoconf 2.62. +m4_define([lt_combine], +[m4_if(m4_eval([$# > 3]), [1], + [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl +[[m4_foreach([_Lt_prefix], [$2], + [m4_foreach([_Lt_suffix], + ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, + [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) + + +# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) +# ----------------------------------------------------------------------- +# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited +# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. +m4_define([lt_if_append_uniq], +[m4_ifdef([$1], + [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], + [lt_append([$1], [$2], [$3])$4], + [$5])], + [lt_append([$1], [$2], [$3])$4])]) + + +# lt_dict_add(DICT, KEY, VALUE) +# ----------------------------- +m4_define([lt_dict_add], +[m4_define([$1($2)], [$3])]) + + +# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) +# -------------------------------------------- +m4_define([lt_dict_add_subkey], +[m4_define([$1($2:$3)], [$4])]) + + +# lt_dict_fetch(DICT, KEY, [SUBKEY]) +# ---------------------------------- +m4_define([lt_dict_fetch], +[m4_ifval([$3], + m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), + m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) + + +# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) +# ----------------------------------------------------------------- +m4_define([lt_if_dict_fetch], +[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], + [$5], + [$6])]) + + +# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) +# -------------------------------------------------------------- +m4_define([lt_dict_filter], +[m4_if([$5], [], [], + [lt_join(m4_quote(m4_default([$4], [[, ]])), + lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), + [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl +]) diff --git a/3rd/pcre2/m4/ltversion.m4 b/3rd/pcre2/m4/ltversion.m4 new file mode 100644 index 00000000..b155d0ac --- /dev/null +++ b/3rd/pcre2/m4/ltversion.m4 @@ -0,0 +1,24 @@ +# ltversion.m4 -- version numbers -*- Autoconf -*- +# +# Copyright (C) 2004, 2011-2019, 2021-2022 Free Software Foundation, +# Inc. +# Written by Scott James Remnant, 2004 +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# @configure_input@ + +# serial 4245 ltversion.m4 +# This file is part of GNU Libtool + +m4_define([LT_PACKAGE_VERSION], [2.4.7]) +m4_define([LT_PACKAGE_REVISION], [2.4.7]) + +AC_DEFUN([LTVERSION_VERSION], +[macro_version='2.4.7' +macro_revision='2.4.7' +_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) +_LT_DECL(, macro_revision, 0) +]) diff --git a/3rd/pcre2/m4/lt~obsolete.m4 b/3rd/pcre2/m4/lt~obsolete.m4 new file mode 100644 index 00000000..0f7a8759 --- /dev/null +++ b/3rd/pcre2/m4/lt~obsolete.m4 @@ -0,0 +1,99 @@ +# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- +# +# Copyright (C) 2004-2005, 2007, 2009, 2011-2019, 2021-2022 Free +# Software Foundation, Inc. +# Written by Scott James Remnant, 2004. +# +# This file is free software; the Free Software Foundation gives +# unlimited permission to copy and/or distribute it, with or without +# modifications, as long as this notice is preserved. + +# serial 5 lt~obsolete.m4 + +# These exist entirely to fool aclocal when bootstrapping libtool. +# +# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), +# which have later been changed to m4_define as they aren't part of the +# exported API, or moved to Autoconf or Automake where they belong. +# +# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN +# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us +# using a macro with the same name in our local m4/libtool.m4 it'll +# pull the old libtool.m4 in (it doesn't see our shiny new m4_define +# and doesn't know about Autoconf macros at all.) +# +# So we provide this file, which has a silly filename so it's always +# included after everything else. This provides aclocal with the +# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything +# because those macros already exist, or will be overwritten later. +# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. +# +# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. +# Yes, that means every name once taken will need to remain here until +# we give up compatibility with versions before 1.7, at which point +# we need to keep only those names which we still refer to. + +# This is to help aclocal find these macros, as it can't see m4_define. +AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) + +m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) +m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) +m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) +m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) +m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) +m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) +m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) +m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) +m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) +m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) +m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) +m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) +m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) +m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) +m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) +m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) +m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) +m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) +m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) +m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) +m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) +m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) +m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) +m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) +m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) +m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) +m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) +m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) +m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) +m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) +m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) +m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) +m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) +m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) +m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) +m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) +m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) +m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) +m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) +m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) +m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) +m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) +m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) +m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) +m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) +m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) +m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) +m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) +m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) +m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) +m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) +m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) +m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) +m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) +m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) diff --git a/3rd/pcre2/m4/pcre2_visibility.m4 b/3rd/pcre2/m4/pcre2_visibility.m4 new file mode 100644 index 00000000..03f4fba6 --- /dev/null +++ b/3rd/pcre2/m4/pcre2_visibility.m4 @@ -0,0 +1,85 @@ +# visibility.m4 serial 4 (gettext-0.18.2) +dnl Copyright (C) 2005, 2008, 2010-2011 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +dnl Originally From Bruno Haible. + +dnl Tests whether the compiler supports the command-line option +dnl -fvisibility=hidden and the function attribute +dnl __attribute__((__visibility__("default"))). +dnl +dnl Set the variable VISIBILITY_CFLAGS. +dnl Defines and sets the variable HAVE_VISIBILITY. +dnl Defines and sets the variable WORKING_WERROR. + +dnl Modified to fit with PCRE build environment by Cristian Rodríguez. +dnl Adjusted for PCRE2 by PH. +dnl Refactored to work with non GCC (but compatible) compilers. + +AC_DEFUN([PCRE2_VISIBILITY], +[ + AC_REQUIRE([AC_PROG_CC]) + VISIBILITY_CFLAGS= + HAVE_VISIBILITY=0 + dnl First, check whether -Werror can be added to the command line, or + dnl whether it leads to an error because of some other option that the + dnl user has put into $CC $CFLAGS $CPPFLAGS. + AC_MSG_CHECKING([whether the -Werror option is usable]) + AC_CACHE_VAL([pcre2_cv_cc_vis_werror], [ + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + pcre2_cv_cc_vis_werror=no + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[]])], + [ + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[]], [[ #warning e ]])], + [], [pcre2_cv_cc_vis_werror=yes] + ) + ], []) + CFLAGS="$pcre2_save_CFLAGS"]) + AC_MSG_RESULT([$pcre2_cv_cc_vis_werror]) + if test -n "$pcre2_cv_cc_vis_werror" && test $pcre2_cv_cc_vis_werror = yes + then + WORKING_WERROR=1 + else + WORKING_WERROR=0 + fi + if test $pcre2_cv_cc_vis_werror = yes; then + dnl Now check whether GCC compatible visibility declarations are supported. + AC_MSG_CHECKING([for GCC compatible visibility declarations]) + AC_CACHE_VAL([pcre2_cv_cc_visibility], [ + pcre2_save_CFLAGS="$CFLAGS" + CFLAGS="$CFLAGS -Werror -fvisibility=hidden" + dnl We use the option -Werror and a function dummyfunc, because on some + dnl platforms (Cygwin 1.7) the use of -fvisibility triggers a warning + dnl "visibility attribute not supported in this configuration; ignored" + dnl at the first function definition in every compilation unit, and we + dnl don't want to use the option in this case. + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[extern __attribute__((__visibility__("hidden"))) int hiddenfunc (void); + extern __attribute__((__visibility__("default"))) int exportedfunc (void); + void dummyfunc (void) {} + ]], + [[]])], + [pcre2_cv_cc_visibility=yes], + [pcre2_cv_cc_visibility=no]) + CFLAGS="$pcre2_save_CFLAGS"]) + AC_MSG_RESULT([$pcre2_cv_cc_visibility]) + fi + if test -n "$pcre2_cv_cc_visibility" && test $pcre2_cv_cc_visibility = yes + then + VISIBILITY_CFLAGS="-fvisibility=hidden" + HAVE_VISIBILITY=1 + AC_DEFINE(PCRE2_EXPORT, [__attribute__ ((visibility ("default")))], [to make a symbol visible]) + else + AC_DEFINE(PCRE2_EXPORT, [], [to make a symbol visible]) + fi + AC_SUBST([VISIBILITY_CFLAGS]) + AC_SUBST([HAVE_VISIBILITY]) + AC_DEFINE_UNQUOTED([HAVE_VISIBILITY], [$HAVE_VISIBILITY], + [Define to 1 if the compiler supports GCC compatible visibility declarations.]) +]) diff --git a/3rd/pcre2/maint/.gitignore b/3rd/pcre2/maint/.gitignore new file mode 100644 index 00000000..88462b18 --- /dev/null +++ b/3rd/pcre2/maint/.gitignore @@ -0,0 +1,9 @@ +ucptest +utf8 + +pcre2_ucp.h +pcre2_ucptables.c +pcre2_ucd.c + +testinput +testoutput diff --git a/3rd/pcre2/maint/132html b/3rd/pcre2/maint/132html new file mode 100644 index 00000000..8d05e9d5 --- /dev/null +++ b/3rd/pcre2/maint/132html @@ -0,0 +1,318 @@ +#! /usr/bin/perl -w + +# Script to turn PCRE2 man pages into HTML + + +# Subroutine to handle font changes and other escapes + +sub do_line { +my($s) = $_[0]; + +$s =~ s/ +$s =~ s/>/>/g; +$s =~ s"\\fI(.*?)\\f[RP]"$1"g; +$s =~ s"\\fB(.*?)\\f[RP]"$1"g; +$s =~ s"\\e"\\"g; +$s =~ s/(?<=Copyright )\(c\)/©/g; +$s =~ s/\\&//g; # Deal with the \& 0-width space +$s; +} + +# Subroutine to ensure not in a paragraph + +sub end_para { +if ($inpara) + { + print TEMP "
\n" if ($inpre); + print TEMP "

\n"; + } +$inpara = $inpre = 0; +$wrotetext = 0; +} + +# Subroutine to start a new paragraph + +sub new_para { +&end_para(); +print TEMP "

\n"; +$inpara = 1; +} + + +# Main program + +$innf = 0; +$inpara = 0; +$inpre = 0; +$wrotetext = 0; +$toc = 0; +$ref = 1; + +while ($#ARGV >= 0 && $ARGV[0] =~ /^-/) + { + $toc = 1 if $ARGV[0] eq "-toc"; + shift; + } + +# Initial output to STDOUT + +print < + +$ARGV[0] specification + + +

$ARGV[0] man page

+

+Return to the PCRE2 index page. +

+

+This page is part of the PCRE2 HTML documentation. It was generated +automatically from the original man page. If there is any nonsense in it, +please consult the man page, in case the conversion went wrong. +
+End + +print "

    \n" if ($toc); + +open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n"; + +while () + { + # Handle lines beginning with a dot + + if (/^\./) + { + # Some of the PCRE2 man pages used to contain instances of .br. However, + # they should have all been removed because they cause trouble in some + # (other) automated systems that translate man pages to HTML. Complain if + # we find .br or .in (another macro that is deprecated). + + if (/^\.br/ || /^\.in/) + { + print STDERR "\n*** Deprecated macro encountered - rewrite needed\n"; + print STDERR "*** $_\n"; + die "*** Processing abandoned\n"; + } + + # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi. + + elsif (/^\.nf/) + { + $innf = 1; + } + + elsif (/^\.fi/) + { + $innf = 0; + } + + # Handling .sp is subtle. If it is inside a literal section, do nothing if + # the next line is a non literal text line; similarly, if not inside a + # literal section, do nothing if a literal follows, unless we are inside + # a .nf/.fi section or about to enter one. The point being that the
    +    # and 
    that delimit literal sections will do the spacing. Always skip + # if no previous output. + + elsif (/^\.sp/) + { + if ($wrotetext) + { + $_ = ; + if ($inpre) + { + print TEMP "\n" if (/^[\s.]/); + } + else + { + print TEMP "
    \n
    \n" if ($innf || /^\.nf/ || !/^[\s.]/); + } + redo; # Now process the lookahead line we just read + } + } + elsif (/^\.TP/ || /^\.PP/ || /^\.P/) + { + &new_para(); + } + elsif (/^\.SH\s*("?)(.*)\1/) + { + # Ignore the NAME section + if ($2 =~ /^NAME\b/) + { + ; + next; + } + + &end_para(); + my($title) = &do_line($2); + if ($toc) + { + printf("
  • $title\n", + $ref, $ref); + printf TEMP ("
    $title
    \n", + $ref); + $ref++; + } + else + { + print TEMP "
    \n$title\n
    \n"; + } + } + elsif (/^\.SS\s*("?)(.*)\1/) + { + &end_para(); + my($title) = &do_line($2); + print TEMP "
    \n$title\n
    \n"; + } + elsif (/^\.B\s*(.*)/) + { + &new_para() if (!$inpara); + $_ = &do_line($1); + s/"(.*?)"/$1/g; + print TEMP "$_\n"; + $wrotetext = 1; + } + elsif (/^\.I\s*(.*)/) + { + &new_para() if (!$inpara); + $_ = &do_line($1); + s/"(.*?)"/$1/g; + print TEMP "$_\n"; + $wrotetext = 1; + } + + # Remove the "AUTOMATICALLY GENERATED" warning from pcre2demo.3 + elsif (/^\.\\"AUTOMATICALLY GENERATED/) { next; } + + # A comment that starts "HREF" takes the next line as a name that + # is turned into a hyperlink, using the text given, which might be + # in a special font. If it ends in () or (digits) or punctuation, they + # aren't part of the link. + + elsif (/^\.\\"\s*HREF/) + { + $_=; + chomp; + $_ = &do_line($_); + $_ =~ s/\s+$//; + $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/; + print TEMP "$_\n"; + } + + # A comment that starts "HTML" inserts literal HTML + + elsif (/^\.\\"\s*HTML\s*(.*)/) + { + print TEMP $1; + } + + # A comment that starts < inserts that HTML at the end of the + # *next* input line - so as not to get a newline between them. + + elsif (/^\.\\"\s*(<.*>)/) + { + my($markup) = $1; + $_=; + chomp; + $_ = &do_line($_); + $_ =~ s/\s+$//; + print TEMP "$_$markup\n"; + } + + # A comment that starts JOIN joins the next two lines together, with one + # space between them. Then that line is processed. This is used in some + # displays where two lines are needed for the "man" version. JOINSH works + # the same, except that it assumes this is a shell command, so removes + # continuation backslashes. + + elsif (/^\.\\"\s*JOIN(SH)?/) + { + my($one,$two); + $one = ; + $two = ; + $one =~ s/\s*\\e\s*$// if (defined($1)); + chomp($one); + $two =~ s/^\s+//; + $_ = "$one $two"; + redo; # Process the joined lines + } + + # .EX/.EE are used in the pcre2demo page to bracket the entire program, + # which is unmodified except for turning backslash into "\e". + + elsif (/^\.EX\s*$/) + { + print TEMP "
    \n";
    +      while ()
    +        {
    +        last if /^\.EE\s*$/;
    +        s/\\e/\\/g;
    +        s/&/&/g;
    +        s//>/g;
    +        print TEMP;
    +        }
    +      }
    +
    +    # Ignore anything not recognized
    +
    +    next;
    +    }
    +
    +  # Line does not begin with a dot. Replace blank lines with new paragraphs
    +
    +  if (/^\s*$/)
    +    {
    +    &end_para() if ($wrotetext);
    +    next;
    +    }
    +
    +  # Convert fonts changes and output an ordinary line. Ensure that indented
    +  # lines are marked as literal.
    +
    +  $_ = &do_line($_);
    +  &new_para() if (!$inpara);
    +
    +  if (/^\s/)
    +    {
    +    if (!$inpre)
    +      {
    +      print TEMP "
    \n";
    +      $inpre = 1;
    +      }
    +    }
    +  elsif ($inpre)
    +    {
    +    print TEMP "
    \n"; + $inpre = 0; + } + + # Add
    to the end of a non-literal line if we are within .nf/.fi + + $_ .= "
    \n" if (!$inpre && $innf); + + print TEMP; + $wrotetext = 1; + } + +# The TOC, if present, will have been written - terminate it + +print "
\n" if ($toc); + +# Copy the remainder to the standard output + +close(TEMP); +open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n"; + +print while (); + +print < +Return to the PCRE2 index page. +

+End + +close(TEMP); +unlink("/tmp/$$"); + +# End diff --git a/3rd/pcre2/maint/CheckMan b/3rd/pcre2/maint/CheckMan new file mode 100644 index 00000000..04e68019 --- /dev/null +++ b/3rd/pcre2/maint/CheckMan @@ -0,0 +1,78 @@ +#! /usr/bin/perl + +# A script to scan PCRE2's man pages to check for typos in the control +# sequences. I use only a small set of the available repertoire, so it is +# straightforward to check that nothing else has slipped in by mistake. This +# script should be called in the doc directory. + +$yield = 0; + +while (scalar(@ARGV) > 0) + { + $line = 0; + $file = shift @ARGV; + + open (IN, $file) || die "Failed to open $file\n"; + + while () + { + $count = 0; + $line++; + if (/^\s*$/) + { + printf "Empty line $line of $file\n"; + $yield = 1; + } + elsif (/^\./) + { + if (!/^\.\s*$| + ^\.B\s+\S| + ^\.TH\s\S| + ^\.SH\s\S| + ^\.SS\s\S| + ^\.TP(?:\s?\d+)?\s*$| + ^\.SM\s*$| + ^\.br\s*$| + ^\.rs\s*$| + ^\.sp\s*$| + ^\.nf\s*$| + ^\.fi\s*$| + ^\.P\s*$| + ^\.PP\s*$| + ^\.\\"(?:\ HREF)?\s*$| + ^\.\\"\sHTML\s\s*$| + ^\.\\"\sHTML\s<\/a>\s*$| + ^\.\\"\s<\/a>\s*$| + ^\.\\"\sJOINSH\s*$| + ^\.\\"\sJOIN\s*$/x + ) + { + printf "Bad control line $line of $file\n"; + $yield = 1; + } + } + elsif (/\\[^ef&]|\\f[^IBP]/) + { + printf "Bad backslash in line $line of $file\n"; + $yield = 1; + } + while (/\\f[BI]/g) + { + $count++; + } + while (/\\fP/g) + { + $count--; + } + if ($count != 0) + { + printf "Mismatching formatting in line $line of $file\n"; + $yield = 1; + } + } + + close(IN); + } + +exit $yield; +# End diff --git a/3rd/pcre2/maint/CheckTxt b/3rd/pcre2/maint/CheckTxt new file mode 100644 index 00000000..c6f9e8c7 --- /dev/null +++ b/3rd/pcre2/maint/CheckTxt @@ -0,0 +1,64 @@ +#! /usr/bin/perl + +# This is a script for checking whether a file contains any carriage return +# characters, and whether it is valid UTF-8. + +use Encode; + +# This subroutine does the work for one file. + +$yield = 0; +$ascii = 0; # bool +$crlf = 0; # bool + +sub checktxt { +my($file) = $_[0]; +open(IN, "<:raw", "$file") || die "Can't open $file for input"; +$bin = do { local $/ = undef; }; +close(IN); +my $data; +eval + { + $data = Encode::decode("UTF-8", $bin, Encode::FB_CROAK); + 1; # return true + } +or do + { + printf "Bad UTF-8 in $file\n"; + $yield = 1; + return; + }; +if (!$crlf && index($data, "\r") != -1) + { + printf "CR in $file\n"; + $yield = 1; + } +if ($ascii && $data =~ /[^\x01-\x7e]/) + { + printf "Non-ASCII in $file\n"; + $yield = 1; + } +} + +# This is the main program + +$, = ""; # Output field separator +for ($i = 0; $i < @ARGV; $i++) + { + if ($ARGV[$i] eq "-ascii") + { + $ascii = 1; + } + elsif ($ARGV[$i] eq "-crlf") + { + $crlf = 1; + } + else + { + checktxt($ARGV[$i]); + } + } + +exit $yield; + +# End diff --git a/3rd/pcre2/maint/CleanTxt b/3rd/pcre2/maint/CleanTxt new file mode 100644 index 00000000..1f42519c --- /dev/null +++ b/3rd/pcre2/maint/CleanTxt @@ -0,0 +1,113 @@ +#! /usr/bin/perl -w + +# Script to take the output of nroff -man and remove all the backspacing and +# the page footers and the screen commands etc so that it is more usefully +# readable online. In fact, in the latest nroff, intermediate footers don't +# seem to be generated any more. + +$blankcount = 0; +$lastwascut = 0; +$firstheader = 1; + +# Input on STDIN; output to STDOUT. + +while () + { + s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + s/.\x8//g; # Remove "char, backspace" + + # Handle header lines. Retain only the first one we encounter, but remove + # the blank line that follows. Any others (e.g. at end of document) and the + # following blank line are dropped. + + if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) + { + if ($firstheader) + { + $firstheader = 0; + print; + $lastprinted = $_; + $lastwascut = 0; + } + $_=; # Remove a blank that follows + next; + } + + # Count runs of empty lines + + if (/^\s*$/) + { + $blankcount++; + $lastwascut = 0; + next; + } + + # If a chunk of lines has been cut out (page footer) and the next line + # has a different indentation, put back one blank line. + + if ($lastwascut && $blankcount < 1 && defined($lastprinted)) + { + ($a) = $lastprinted =~ /^(\s*)/; + ($b) = $_ =~ /^(\s*)/; + $blankcount++ if ($a ne $b); + } + + # We get here only when we have a non-blank line in hand. If it was preceded + # by 3 or more blank lines, read the next 3 lines and see if they are blank. + # If so, remove all 7 lines, and remember that we have just done a cut. + + if ($blankcount >= 3) + { + for ($i = 0; $i < 3; $i++) + { + $next[$i] = ; + $next[$i] = "" if !defined $next[$i]; + $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + $next[$i] =~ s/.\x8//g; # Remove "char, backspace" + } + + # Cut out chunks of the form <3 blanks><3 blanks> + + if ($next[0] =~ /^\s*$/ && + $next[1] =~ /^\s*$/ && + $next[2] =~ /^\s*$/) + { + $blankcount -= 3; + $lastwascut = 1; + } + + # Otherwise output the saved blanks, the current, and the next three + # lines. Remember the last printed line. + + else + { + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + for ($i = 0; $i < 3; $i++) + { + $next[$i] =~ s/.\x8//g; + print $next[$i]; + $lastprinted = $_; + } + $lastwascut = 0; + $blankcount = 0; + } + } + + # This non-blank line is not preceded by 3 or more blank lines. Output + # any blanks there are, and the line. Remember it. Force two blank lines + # before headings. + + else + { + $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && + defined($lastprinted); + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + $lastprinted = $_; + $lastwascut = 0; + $blankcount = 0; + } + } + +# End diff --git a/3rd/pcre2/maint/Detrail b/3rd/pcre2/maint/Detrail new file mode 100644 index 00000000..85eb3d1b --- /dev/null +++ b/3rd/pcre2/maint/Detrail @@ -0,0 +1,35 @@ +#! /usr/bin/perl + +# This is a script for removing trailing whitespace from lines in files that +# are listed on the command line. + +# This subroutine does the work for one file. + +sub detrail { +my($file) = $_[0]; +my($changed) = 0; +open(IN, "<", "$file") || die "Can't open $file for input"; +@lines = ; +close(IN); +foreach (@lines) + { + if (/\s+\n$/) + { + s/\s+\n$/\n/; + $changed = 1; + } + } +if ($changed) + { + open(OUT, ">", "$file") || die "Can't open $file for output"; + print OUT @lines; + close(OUT); + } +} + +# This is the main program + +$, = ""; # Output field separator +for ($i = 0; $i < @ARGV; $i++) { &detrail($ARGV[$i]); } + +# End diff --git a/3rd/pcre2/maint/GenerateCommon.py b/3rd/pcre2/maint/GenerateCommon.py new file mode 100644 index 00000000..9451c6ec --- /dev/null +++ b/3rd/pcre2/maint/GenerateCommon.py @@ -0,0 +1,354 @@ +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ + +# This file is a Python module containing common lists and functions for the +# GenerateXXX scripts that create various.c and .h files from Unicode data +# files. It was created as part of a re-organizaton of these scripts in +# December 2021. + + +import re + + +# --------------------------------------------------------------------------- +# DATA LISTS +# --------------------------------------------------------------------------- + +# BIDI classes in the DerivedBidiClass.txt file, short and long identifiers. + +bidi_classes = [ + 'AL', 'Arabic_Letter', + 'AN', 'Arabic_Number', + 'B', 'Paragraph_Separator', + 'BN', 'Boundary_Neutral', + 'CS', 'Common_Separator', + 'EN', 'European_Number', + 'ES', 'European_Separator', + 'ET', 'European_Terminator', + 'FSI', 'First_Strong_Isolate', + 'L', 'Left_To_Right', + 'LRE', 'Left_To_Right_Embedding', + 'LRI', 'Left_To_Right_Isolate', + 'LRO', 'Left_To_Right_Override', + 'NSM', 'Nonspacing_Mark', + 'ON', 'Other_Neutral', + 'PDF', 'Pop_Directional_Format', + 'PDI', 'Pop_Directional_Isolate', + 'R', 'Right_To_Left', + 'RLE', 'Right_To_Left_Embedding', + 'RLI', 'Right_To_Left_Isolate', + 'RLO', 'Right_To_Left_Override', + 'S', 'Segment_Separator', + 'WS', 'White_Space' + ] + +# Particular category property names, with comments. NOTE: If ever this list +# is changed, the table called "catposstab" in the pcre2_auto_possess.c file +# must be edited to keep in step. + +category_names = [ + 'Cc', 'Control', + 'Cf', 'Format', + 'Cn', 'Unassigned', + 'Co', 'Private use', + 'Cs', 'Surrogate', + 'Ll', 'Lower case letter', + 'Lm', 'Modifier letter', + 'Lo', 'Other letter', + 'Lt', 'Title case letter', + 'Lu', 'Upper case letter', + 'Mc', 'Spacing mark', + 'Me', 'Enclosing mark', + 'Mn', 'Non-spacing mark', + 'Nd', 'Decimal number', + 'Nl', 'Letter number', + 'No', 'Other number', + 'Pc', 'Connector punctuation', + 'Pd', 'Dash punctuation', + 'Pe', 'Close punctuation', + 'Pf', 'Final punctuation', + 'Pi', 'Initial punctuation', + 'Po', 'Other punctuation', + 'Ps', 'Open punctuation', + 'Sc', 'Currency symbol', + 'Sk', 'Modifier symbol', + 'Sm', 'Mathematical symbol', + 'So', 'Other symbol', + 'Zl', 'Line separator', + 'Zp', 'Paragraph separator', + 'Zs', 'Space separator' + ] + +# The Extended_Pictographic property is not found in the file where all the +# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt +# file, but we list it here so that the name has the correct index value. + +break_properties = [ + 'CR', ' 0', + 'LF', ' 1', + 'Control', ' 2', + 'Extend', ' 3', + 'Prepend', ' 4', + 'SpacingMark', ' 5', + 'L', ' 6 Hangul syllable type L', + 'V', ' 7 Hangul syllable type V', + 'T', ' 8 Hangul syllable type T', + 'LV', ' 9 Hangul syllable type LV', + 'LVT', '10 Hangul syllable type LVT', + 'Regional_Indicator', '11', + 'Other', '12', + 'ZWJ', '13', + 'Extended_Pictographic', '14' + ] + +# List of files from which the names of Boolean properties are obtained, along +# with a list of regex patterns for properties to be ignored, and a list of +# extra pattern names to add. + +bool_propsfiles = ['PropList.txt', 'DerivedCoreProperties.txt', 'emoji-data.txt'] +bool_propsignore = [r'^Other_', r'^Hyphen$'] +bool_propsextras = ['ASCII', 'Bidi_Mirrored'] + + +# --------------------------------------------------------------------------- +# GET BOOLEAN PROPERTY NAMES +# --------------------------------------------------------------------------- + +# Get a list of Boolean property names from a number of files. + +def getbpropslist(): + bplist = [] + bplast = "" + + for filename in bool_propsfiles: + try: + file = open('Unicode.tables/' + filename, 'r') + except IOError: + print(f"** Couldn't open {'Unicode.tables/' + filename}\n") + sys.exit(1) + + for line in file: + line = re.sub(r'#.*', '', line) + data = list(map(str.strip, line.split(';'))) + if len(data) <= 1 or data[1] == bplast: + continue + bplast = data[1] + for pat in bool_propsignore: + if re.match(pat, bplast) != None: + break + else: + if bplast not in bplist: + bplist.append(bplast) + + file.close() + + bplist.extend(bool_propsextras) + bplist.sort() + return bplist + +bool_properties = getbpropslist() +bool_props_list_item_size = (len(bool_properties) + 31) // 32 + + + +# --------------------------------------------------------------------------- +# COLLECTING PROPERTY NAMES AND ALIASES +# --------------------------------------------------------------------------- + +script_names = ['Unknown'] +abbreviations = {} + +def collect_property_names(): + global script_names + global abbreviations + + names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_]+) #') + + last_script_name = "" + with open("Unicode.tables/Scripts.txt") as f: + for line in f: + match_obj = names_re.match(line) + + if match_obj == None or match_obj.group(1) == last_script_name: + continue + + last_script_name = match_obj.group(1) + script_names.append(last_script_name) + + # Sometimes there is comment in the line + # so splitting around semicolon is not enough + value_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_ ]+))?') + + with open("Unicode.tables/PropertyValueAliases.txt") as f: + for line in f: + match_obj = value_alias_re.match(line) + + if match_obj == None: + continue + + if match_obj.group(1) == "sc": + if match_obj.group(2) == match_obj.group(3): + abbreviations[match_obj.group(3)] = () + elif match_obj.group(4) == None: + abbreviations[match_obj.group(3)] = (match_obj.group(2),) + else: + abbreviations[match_obj.group(3)] = (match_obj.group(2), match_obj.group(4)) + + # We can also collect Boolean property abbreviations into the same dictionary + + bin_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_]+))?') + with open("Unicode.tables/PropertyAliases.txt") as f: + for line in f: + match_obj = bin_alias_re.match(line) + if match_obj == None: + continue + + if match_obj.group(2) != match_obj.group(1) and match_obj.group(2) in bool_properties: + if match_obj.group(3) == None: + abbreviations[match_obj.group(2)] = (match_obj.group(1),) + else: + abbreviations[match_obj.group(2)] = (match_obj.group(1), match_obj.group(3)) + +collect_property_names() + + + +# --------------------------------------------------------------------------- +# REORDERING SCRIPT NAMES +# --------------------------------------------------------------------------- + +script_abbrevs = [] + +def reorder_scripts(): + global script_names + global script_abbrevs + global abbreviations + + for name in script_names: + abbrevs = abbreviations[name] + script_abbrevs.append(name if len(abbrevs) == 0 else abbrevs[0]) + + extended_script_abbrevs = set() + with open("Unicode.tables/ScriptExtensions.txt") as f: + names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+[A-Za-z]) +#') + + for line in f: + match_obj = names_re.match(line) + + if match_obj == None: + continue + + for name in match_obj.group(1).split(" "): + extended_script_abbrevs.add(name) + + new_script_names = [] + new_script_abbrevs = [] + + for idx, abbrev in enumerate(script_abbrevs): + if abbrev in extended_script_abbrevs: + new_script_names.append(script_names[idx]) + new_script_abbrevs.append(abbrev) + + for idx, abbrev in enumerate(script_abbrevs): + if abbrev not in extended_script_abbrevs: + new_script_names.append(script_names[idx]) + new_script_abbrevs.append(abbrev) + + script_names = new_script_names + script_abbrevs = new_script_abbrevs + +reorder_scripts() +script_list_item_size = (script_names.index('Unknown') + 31) // 32 + + +# --------------------------------------------------------------------------- +# DERIVED LISTS +# --------------------------------------------------------------------------- + +# Create general character property names from the first letters of the +# particular categories. + +gcn_set = set(category_names[i][0] for i in range(0, len(category_names), 2)) +general_category_names = list(gcn_set) +general_category_names.sort() + + +# --------------------------------------------------------------------------- +# FUNCTIONS +# --------------------------------------------------------------------------- + +import sys + +# Open an output file, using the command's argument or a default. Write common +# preliminary header information. + +def open_output(default): + if len(sys.argv) > 2: + print('** Too many arguments: just give a file name') + sys.exit(1) + if len(sys.argv) == 2: + output_name = sys.argv[1] + else: + output_name = default + try: + file = open(output_name, "w") + except IOError: + print("** Couldn't open %s" % output_name) + sys.exit(1) + + script_name = sys.argv[0] + i = script_name.rfind('/') + if i >= 0: + script_name = script_name[i+1:] + + file.write("""\ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +""") + + file.write("Instead, modify the maint/%s script and run it to generate\n" + "a new version of this code.\n\n" % script_name) + + file.write("""\ +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ +\n""") + return file + +# End of UcpCommon.py diff --git a/3rd/pcre2/maint/GenerateTest.py b/3rd/pcre2/maint/GenerateTest.py new file mode 100644 index 00000000..1b36a35e --- /dev/null +++ b/3rd/pcre2/maint/GenerateTest.py @@ -0,0 +1,186 @@ +#! /usr/bin/env python3 + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ +# +# This file auto-generates Unicode property tests and their expected output. +# It is recommended to re-run this generator after the Unicode files are +# updated. The names of the generated files are `testinput` and `testoutput` +# and should be copied over to replace either test26 or test27 files. + +import re +import sys + +from GenerateCommon import \ + script_names, \ + script_abbrevs + +def write_both(text): + input_file.write(text) + output_file.write(text) + +def to_string_char(ch_idx): + if ch_idx < 128: + if ch_idx < 16: + return "\\x{0%x}" % ch_idx + if ch_idx >= 32: + return chr(ch_idx) + return "\\x{%x}" % ch_idx + +try: + input_file = open("testinput", "w") + output_file = open("testoutput", "w") +except IOError: + print("** Couldn't create output files") + sys.exit(1) + +write_both("# These tests were generated by maint/GenerateTest.py using PCRE2's UCP\n"); +write_both("# data, do not edit unless that data has changed and they are reflecting\n"); +write_both("# a previous version.\n\n"); + +# --------------------------------------------------------------------------- +# UNICODE SCRIPT EXTENSION TESTS +# --------------------------------------------------------------------------- + + +def gen_script_tests(): + script_data = [None] * len(script_names) + char_data = [None] * 0x110000 + + property_re = re.compile(r"^([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+[A-Za-z]) +#") + prev_name = "" + script_idx = -1 + + with open("Unicode.tables/Scripts.txt") as f: + version_pat = r"^# Scripts-(\d+\.\d+\.\d+)\.txt$" + v = re.match(version_pat, f.readline()) + unicode_version = v.group(1) + + write_both("# Unicode Script Extension tests for version " + unicode_version + "\n\n") + write_both("#perltest\n\n") + + for line in f: + match_obj = property_re.match(line) + + if match_obj == None: + continue + + name = match_obj.group(3) + if name != prev_name: + script_idx = script_names.index(name) + prev_name = name + + low = int(match_obj.group(1), 16) + high = low + char_data[low] = name + + if match_obj.group(2) != None: + high = int(match_obj.group(2), 16) + for idx in range(low + 1, high + 1): + char_data[idx] = name + + if script_data[script_idx] == None: + script_data[script_idx] = [low, None, None, None, None] + script_data[script_idx][1] = high + + extended_script_indicies = {} + + with open("Unicode.tables/ScriptExtensions.txt") as f: + for line in f: + match_obj = property_re.match(line) + + if match_obj == None: + continue + + low = int(match_obj.group(1), 16) + high = low + if match_obj.group(2) != None: + high = int(match_obj.group(2), 16) + + for abbrev in match_obj.group(3).split(" "): + if abbrev not in extended_script_indicies: + idx = script_abbrevs.index(abbrev) + extended_script_indicies[abbrev] = idx + rec = script_data[idx] + rec[2] = low + rec[3] = high + else: + idx = extended_script_indicies[abbrev] + rec = script_data[idx] + if rec[2] > low: + rec[2] = low + if rec[3] < high: + rec[3] = high + + if rec[4] == None: + name = script_names[idx] + for idx in range(low, high + 1): + if char_data[idx] != name: + rec[4] = idx + break + + long_property_name = False + + for idx, rec in enumerate(script_data): + script_name = script_names[idx] + + if script_name == "Unknown": + continue + + script_abbrev = script_abbrevs[idx] + + write_both("# Base script check\n") + write_both("/^\\p{sc=%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[0])) + output_file.write(" 0: %s\n" % to_string_char(rec[0])) + write_both("\n") + + write_both("/^\\p{Script=%s}/utf\n" % script_abbrev) + write_both(" %s\n" % to_string_char(rec[1])) + output_file.write(" 0: %s\n" % to_string_char(rec[1])) + write_both("\n") + + if rec[2] != None: + property_name = "scx" + if long_property_name: + property_name = "Script_Extensions" + + write_both("# Script extension check\n") + write_both("/^\\p{%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[2])) + output_file.write(" 0: %s\n" % to_string_char(rec[2])) + write_both("\n") + + write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev)) + write_both(" %s\n" % to_string_char(rec[3])) + output_file.write(" 0: %s\n" % to_string_char(rec[3])) + write_both("\n") + + long_property_name = not long_property_name + + if rec[4] != None: + write_both("# Script extension only character\n") + write_both("/^\\p{%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[4])) + output_file.write(" 0: %s\n" % to_string_char(rec[4])) + write_both("\n") + + write_both("/^\\p{sc=%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[4])) + output_file.write("No match\n") + write_both("\n") + else: + print("External character has not found for %s" % script_name) + + high = rec[1] + if rec[3] != None and rec[3] > rec[1]: + high = rec[3] + write_both("# Character not in script\n") + write_both("/^\\p{%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(high + 1)) + output_file.write("No match\n") + write_both("\n") + +gen_script_tests() + +write_both("# End of test\n") diff --git a/3rd/pcre2/maint/GenerateUcd.py b/3rd/pcre2/maint/GenerateUcd.py new file mode 100644 index 00000000..ed09c7f1 --- /dev/null +++ b/3rd/pcre2/maint/GenerateUcd.py @@ -0,0 +1,1002 @@ +#! /usr/bin/env python3 + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ +# +# This script generates the pcre2_ucd.c file from Unicode data files. This is +# the compressed Unicode property data used by PCRE2. The script was created in +# December 2021 as part of the Unicode data generation refactoring. It is +# basically a re-working of the MultiStage2.py script that was submitted to the +# PCRE project by Peter Kankowski in 2008 as part of a previous upgrading of +# Unicode property support. A number of extensions have since been added. The +# main difference in the 2021 upgrade (apart from comments and layout) is that +# the data tables (e.g. list of script names) are now listed in or generated by +# a separate Python module that is shared with the other Generate scripts. +# +# This script must be run in the "maint" directory. It requires the following +# Unicode data tables: BidiMirrorring.txt, CaseFolding.txt, +# DerivedBidiClass.txt, DerivedCoreProperties.txt, DerivedGeneralCategory.txt, +# GraphemeBreakProperty.txt, PropList.txt, PropertyAliases.txt, +# PropertyValueAliases.txt, ScriptExtensions.txt, Scripts.txt, and +# emoji-data.txt. These must be in the Unicode.tables subdirectory. +# +# The emoji-data.txt file is found in the "emoji" subdirectory even though it +# is technically part of a different (but coordinated) standard as shown +# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"), +# for example: +# +# http://unicode.org/Public/emoji/13.0/ReadMe.txt +# +# DerivedBidiClass.txt and DerivedGeneralCategory.txt are in the "extracted" +# subdirectory of the Unicode database (UCD) on the Unicode web site; +# GraphemeBreakProperty.txt is in the "auxiliary" subdirectory. The other files +# are in the top-level UCD directory. +# +# ----------------------------------------------------------------------------- +# Minor modifications made to the original script: +# Added #! line at start +# Removed tabs +# Made it work with Python 2.4 by rewriting two statements that needed 2.5 +# Consequent code tidy +# Adjusted data file names to take from the Unicode.tables directory +# Adjusted global table names by prefixing _pcre_. +# Commented out stuff relating to the casefolding table, which isn't used; +# removed completely in 2012. +# Corrected size calculation +# Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed. +# Update for PCRE2: name changes, and SUPPORT_UCP is abolished. +# +# Major modifications made to the original script: +# Added code to add a grapheme break property field to records. +# +# Added code to search for sets of more than two characters that must match +# each other caselessly. A new table is output containing these sets, and +# offsets into the table are added to the main output records. This new +# code scans CaseFolding.txt instead of UnicodeData.txt, which is no longer +# used. +# +# Update for Python3: +# . Processed with 2to3, but that didn't fix everything +# . Changed string.strip to str.strip +# . Added encoding='utf-8' to the open() call +# . Inserted 'int' before blocksize/ELEMS_PER_LINE because an int is +# required and the result of the division is a float +# +# Added code to scan the emoji-data.txt file to find the Extended Pictographic +# property, which is used by PCRE2 as a grapheme breaking property. This was +# done when updating to Unicode 11.0.0 (July 2018). +# +# Added code to add a Script Extensions field to records. This has increased +# their size from 8 to 12 bytes, only 10 of which are currently used. +# +# Added code to add a bidi class field to records by scanning the +# DerivedBidiClass.txt and PropList.txt files. This uses one of the two spare +# bytes, so now 11 out of 12 are in use. +# +# 01-March-2010: Updated list of scripts for Unicode 5.2.0 +# 30-April-2011: Updated list of scripts for Unicode 6.0.0 +# July-2012: Updated list of scripts for Unicode 6.1.0 +# 20-August-2012: Added scan of GraphemeBreakProperty.txt and added a new +# field in the record to hold the value. Luckily, the +# structure had a hole in it, so the resulting table is +# not much bigger than before. +# 18-September-2012: Added code for multiple caseless sets. This uses the +# final hole in the structure. +# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0 +# 13-May-2014: Updated for PCRE2 +# 03-June-2014: Updated for Python 3 +# 20-June-2014: Updated for Unicode 7.0.0 +# 12-August-2014: Updated to put Unicode version into the file +# 19-June-2015: Updated for Unicode 8.0.0 +# 02-July-2017: Updated for Unicode 10.0.0 +# 03-July-2018: Updated for Unicode 11.0.0 +# 07-July-2018: Added code to scan emoji-data.txt for the Extended +# Pictographic property. +# 01-October-2018: Added the 'Unknown' script name +# 03-October-2018: Added new field for Script Extensions +# 27-July-2019: Updated for Unicode 12.1.0 +# 10-March-2020: Updated for Unicode 13.0.0 +# PCRE2-10.39: Updated for Unicode 14.0.0 +# 05-December-2021: Added code to scan DerivedBidiClass.txt for bidi class, +# and also PropList.txt for the Bidi_Control property +# 19-December-2021: Reworked script extensions lists to be bit maps instead +# of zero-terminated lists of script numbers. +# ---------------------------------------------------------------------------- +# +# Changes to the refactored script: +# +# 26-December-2021: Refactoring completed +# 10-January-2022: Addition of general Boolean property support +# 12-January-2022: Merge scriptx and bidiclass fields +# 14-January-2022: Enlarge Boolean property offset to 12 bits +# 28-January-2023: Remove ASCII "other case" from non-ASCII character that +# are present in caseless sets. +# +# ---------------------------------------------------------------------------- +# +# +# The main tables generated by this script are used by macros defined in +# pcre2_internal.h. They look up Unicode character properties using short +# sequences of code that contains no branches, which makes for greater speed. +# +# Conceptually, there is a table of records (of type ucd_record), one for each +# Unicode character. Each record contains the script number, script extension +# value, character type, grapheme break type, offset to caseless matching set, +# offset to the character's other case, the bidi class, and offset to bitmap of +# Boolean properties. +# +# A real table covering all Unicode characters would be far too big. It can be +# efficiently compressed by observing that many characters have the same +# record, and many blocks of characters (taking 128 characters in a block) have +# the same set of records as other blocks. This leads to a 2-stage lookup +# process. +# +# This script constructs seven tables. The ucd_caseless_sets table contains +# lists of characters that all match each other caselessly. Each list is +# in order, and is terminated by NOTACHAR (0xffffffff), which is larger than +# any valid character. The first list is empty; this is used for characters +# that are not part of any list. +# +# The ucd_digit_sets table contains the code points of the '9' characters in +# each set of 10 decimal digits in Unicode. This is used to ensure that digits +# in script runs all come from the same set. The first element in the vector +# contains the number of subsequent elements, which are in ascending order. +# +# Scripts are partitioned into two groups. Scripts that appear in at least one +# character's script extension list come first, followed by "Unknown" and then +# all the rest. This sorting is done automatically in the GenerateCommon.py +# script. A script's number is its index in the script_names list. +# +# The ucd_script_sets table contains bitmaps that represent lists of scripts +# for Script Extensions properties. Each bitmap consists of a fixed number of +# unsigned 32-bit numbers, enough to allocate a bit for every script that is +# used in any character's extension list, that is, enough for every script +# whose number is less than ucp_Unknown. A character's script extension value +# in its ucd record is an offset into the ucd_script_sets vector. The first +# bitmap has no bits set; characters that have no script extensions have zero +# as their script extensions value so that they use this map. +# +# The ucd_boolprop_sets table contains bitmaps that represent lists of Boolean +# properties. Each bitmap consists of a fixed number of unsigned 32-bit +# numbers, enough to allocate a bit for each supported Boolean property. +# +# The ucd_records table contains one instance of every unique character record +# that is required. The ucd_stage1 table is indexed by a character's block +# number, which is the character's code point divided by 128, since 128 is the +# size of each block. The result of a lookup in ucd_stage1 a "virtual" block +# number. +# +# The ucd_stage2 table is a table of "virtual" blocks; each block is indexed by +# the offset of a character within its own block, and the result is the index +# number of the required record in the ucd_records vector. +# +# The following examples are correct for the Unicode 14.0.0 database. Future +# updates may make change the actual lookup values. +# +# Example: lowercase "a" (U+0061) is in block 0 +# lookup 0 in stage1 table yields 0 +# lookup 97 (0x61) in the first table in stage2 yields 35 +# record 35 is { 0, 5, 12, 0, -32, 18432, 44 } +# 0 = ucp_Latin => Latin script +# 5 = ucp_Ll => Lower case letter +# 12 = ucp_gbOther => Grapheme break property "Other" +# 0 => Not part of a caseless set +# -32 (-0x20) => Other case is U+0041 +# 18432 = 0x4800 => Combined Bidi class + script extension values +# 44 => Offset to Boolean properties +# +# The top 5 bits of the sixth field are the Bidi class, with the rest being the +# script extension value, giving: +# +# 9 = ucp_bidiL => Bidi class left-to-right +# 0 => No special script extension property +# +# Almost all lowercase latin characters resolve to the same record. One or two +# are different because they are part of a multi-character caseless set (for +# example, k, K and the Kelvin symbol are such a set). +# +# Example: hiragana letter A (U+3042) is in block 96 (0x60) +# lookup 96 in stage1 table yields 93 +# lookup 66 (0x42) in table 93 in stage2 yields 819 +# record 819 is { 20, 7, 12, 0, 0, 18432, 82 } +# 20 = ucp_Hiragana => Hiragana script +# 7 = ucp_Lo => Other letter +# 12 = ucp_gbOther => Grapheme break property "Other" +# 0 => Not part of a caseless set +# 0 => No other case +# 18432 = 0x4800 => Combined Bidi class + script extension values +# 82 => Offset to Boolean properties +# +# The top 5 bits of the sixth field are the Bidi class, with the rest being the +# script extension value, giving: +# +# 9 = ucp_bidiL => Bidi class left-to-right +# 0 => No special script extension property +# +# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39) +# lookup 57 in stage1 table yields 55 +# lookup 80 (0x50) in table 55 in stage2 yields 621 +# record 621 is { 84, 12, 3, 0, 0, 26762, 96 } +# 84 = ucp_Inherited => Script inherited from predecessor +# 12 = ucp_Mn => Non-spacing mark +# 3 = ucp_gbExtend => Grapheme break property "Extend" +# 0 => Not part of a caseless set +# 0 => No other case +# 26762 = 0x688A => Combined Bidi class + script extension values +# 96 => Offset to Boolean properties +# +# The top 5 bits of the sixth field are the Bidi class, with the rest being the +# script extension value, giving: +# +# 13 = ucp_bidiNSM => Bidi class non-spacing mark +# 138 => Script Extension list offset = 138 +# +# At offset 138 in the ucd_script_sets vector we find a bitmap with bits 1, 8, +# 18, and 47 set. This means that this character is expected to be used with +# any of those scripts, which are Bengali, Devanagari, Kannada, and Grantha. +# +# Philip Hazel, last updated 14 January 2022. +############################################################################## + + +# Import standard modules + +import re +import string +import sys + +# Import common data lists and functions + +from GenerateCommon import \ + bidi_classes, \ + bool_properties, \ + bool_propsfiles, \ + bool_props_list_item_size, \ + break_properties, \ + category_names, \ + general_category_names, \ + script_abbrevs, \ + script_list_item_size, \ + script_names, \ + open_output + +# Some general parameters + +MAX_LIST = 8 # keep on sync with the value in pcre2_auto_possess.c +MAX_UNICODE = 0x110000 +NOTACHAR = 0xffffffff + + +# --------------------------------------------------------------------------- +# DEFINE FUNCTIONS +# --------------------------------------------------------------------------- + + +# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt + +def make_get_names(enum): + return lambda chardata: enum.index(chardata[1]) + + +# Parse a line of DerivedBidiClass.txt + +def get_bidi(chardata): + if len(chardata[1]) > 3: + return bidi_classes_long.index(chardata[1]) + else: + return bidi_classes_short.index(chardata[1]) + + +# Parse a line of CaseFolding.txt + +def get_other_case(chardata): + if chardata[1] == 'C' or chardata[1] == 'S': + return int(chardata[2], 16) - int(chardata[0], 16) + return None + + +# Parse a line of ScriptExtensions.txt + +def get_script_extension(chardata): + script_extension = tuple(script_abbrevs.index(abbrev) for abbrev in chardata[1].split(' ')) + + try: + index = script_lists.index(script_extension) + except ValueError: + index = len(script_lists) + script_lists.append(script_extension) + + return index * script_list_item_size + + +# Read a whole table in memory, setting/checking the Unicode version + +def read_table(file_name, get_value, default_value): + global unicode_version + + f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name) + file_base = f.group(1) + version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$" + file = open(file_name, 'r', encoding='utf-8') + f = re.match(version_pat, file.readline()) + version = f.group(1) + if unicode_version == "": + unicode_version = version + elif unicode_version != version: + print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr) + + table = [default_value] * MAX_UNICODE + for line in file: + if file_base == 'DerivedBidiClass': + line = re.sub(r'# @missing: ', '', line) + + line = re.sub(r'#.*', '', line) + chardata = list(map(str.strip, line.split(';'))) + if len(chardata) <= 1: + continue + value = get_value(chardata) + if value is None: + continue + m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) + char = int(m.group(1), 16) + if m.group(3) is None: + last = char + else: + last = int(m.group(3), 16) + for i in range(char, last + 1): + if file_base == 'CaseFolding' and table[i] != default_value: + print("WARNING: multiple rules for other_case[0x{:X}]".format(i)) + table[i] = value + + file.close() + return table + + +# Get the smallest possible C language type for the values in a table + +def get_type_size(table): + type_size = [("uint8_t", 1), ("uint16_t", 2), ("uint32_t", 4), + ("signed char", 1), ("int16_t", 2), ("int32_t", 4)] + limits = [(0, 255), (0, 65535), (0, 4294967295), (-128, 127), + (-32768, 32767), (-2147483648, 2147483647)] + minval = min(table) + maxval = max(table) + for num, (minlimit, maxlimit) in enumerate(limits): + if minlimit <= minval and maxval <= maxlimit: + return type_size[num] + raise OverflowError("Too large to fit into C types") + + +# Get the total size of a list of tables + +def get_tables_size(*tables): + total_size = 0 + for table in tables: + type, size = get_type_size(table) + total_size += size * len(table) + return total_size + + +# Compress a table into the two stages + +def compress_table(table, block_size): + blocks = {} # Dictionary for finding identical blocks + stage1 = [] # Stage 1 table contains block numbers (indices into stage 2 table) + stage2 = [] # Stage 2 table contains the blocks with property values + table = tuple(table) + for i in range(0, len(table), block_size): + block = table[i:i+block_size] + start = blocks.get(block) + if start is None: + # Allocate a new block + start = len(stage2) / block_size + stage2 += block + blocks[block] = start + stage1.append(start) + return stage1, stage2 + + +# Output a table + +def write_table(table, table_name, block_size = None): + type, size = get_type_size(table) + ELEMS_PER_LINE = 16 + + s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table)) + if block_size: + s += ", block = %d" % block_size + f.write(s + " */\n") + table = tuple(table) + if block_size is None: + fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */\n" + mult = MAX_UNICODE / len(table) + for i in range(0, len(table), ELEMS_PER_LINE): + f.write(fmt % (table[i:i+ELEMS_PER_LINE] + (int(i * mult),))) + else: + if block_size > ELEMS_PER_LINE: + el = ELEMS_PER_LINE + else: + el = block_size + fmt = "%3d," * el + "\n" + if block_size > ELEMS_PER_LINE: + fmt = fmt * int(block_size / ELEMS_PER_LINE) + for i in range(0, len(table), block_size): + f.write(("\n/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size])) + f.write("};\n\n") + + +# Extract the unique combinations of properties into records + +def combine_tables(*tables): + records = {} + index = [] + for t in zip(*tables): + i = records.get(t) + if i is None: + i = records[t] = len(records) + index.append(i) + return index, records + + +# Create a record struct + +def get_record_size_struct(records): + size = 0 + structure = 'typedef struct {\n' + for i in range(len(records[0])): + record_slice = [record[i] for record in records] + slice_type, slice_size = get_type_size(record_slice) + # add padding: round up to the nearest power of slice_size + size = (size + slice_size - 1) & -slice_size + size += slice_size + structure += '%s property_%d;\n' % (slice_type, i) + + # round up to the first item of the next structure in array + record_slice = [record[0] for record in records] + slice_type, slice_size = get_type_size(record_slice) + size = (size + slice_size - 1) & -slice_size + + structure += '} ucd_record;\n*/\n' + return size, structure + + +# Write records + +def write_records(records, record_size): + f.write('const ucd_record PRIV(ucd_records)[] = { ' + \ + '/* %d bytes, record size %d */\n' % (len(records) * record_size, record_size)) + records = list(zip(list(records.keys()), list(records.values()))) + records.sort(key = lambda x: x[1]) + for i, record in enumerate(records): + f.write((' {' + '%6d, ' * len(record[0]) + '}, /* %3d */\n') % (record[0] + (i,))) + f.write('};\n\n') + + +# Write a bit set + +def write_bitsets(list, item_size): + for d in list: + bitwords = [0] * item_size + for idx in d: + bitwords[idx // 32] |= 1 << (idx & 31) + s = " " + for x in bitwords: + f.write("%s" % s) + s = ", " + f.write("0x%08xu" % x) + f.write(",\n") + f.write("};\n\n") + + +# --------------------------------------------------------------------------- +# This bit of code must have been useful when the original script was being +# developed. Retain it just in case it is ever needed again. + +# def test_record_size(): +# tests = [ \ +# ( [(3,), (6,), (6,), (1,)], 1 ), \ +# ( [(300,), (600,), (600,), (100,)], 2 ), \ +# ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \ +# ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \ +# ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \ +# ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \ +# ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \ +# ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \ +# ] +# for test in tests: +# size, struct = get_record_size_struct(test[0]) +# assert(size == test[1]) +# test_record_size() +# --------------------------------------------------------------------------- + + + +# --------------------------------------------------------------------------- +# MAIN CODE FOR CREATING TABLES +# --------------------------------------------------------------------------- + +unicode_version = "" + +# Some of the tables imported from GenerateCommon.py have alternate comment +# strings for use by GenerateUcpHeader. The comments are not wanted here, so +# remove them. + +bidi_classes_short = bidi_classes[::2] +bidi_classes_long = bidi_classes[1::2] +break_properties = break_properties[::2] +category_names = category_names[::2] + +# Create the various tables from Unicode data files + +script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Unknown')) +category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn')) +break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_properties), break_properties.index('Other')) +other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0) +bidi_class = read_table('Unicode.tables/DerivedBidiClass.txt', get_bidi, bidi_classes_short.index('L')) + +# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now +# we need to find the Extended_Pictographic property for emoji characters. This +# can be set as an additional grapheme break property, because the default for +# all the emojis is "other". We scan the emoji-data.txt file and modify the +# break-props table. + +file = open('Unicode.tables/emoji-data.txt', 'r', encoding='utf-8') +for line in file: + line = re.sub(r'#.*', '', line) + chardata = list(map(str.strip, line.split(';'))) + if len(chardata) <= 1: + continue + if chardata[1] != "Extended_Pictographic": + continue + m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) + char = int(m.group(1), 16) + if m.group(3) is None: + last = char + else: + last = int(m.group(3), 16) + for i in range(char, last + 1): + if break_props[i] != break_properties.index('Other'): + print("WARNING: Emoji 0x%x has break property %s, not 'Other'", + i, break_properties[break_props[i]], file=sys.stderr) + break_props[i] = break_properties.index('Extended_Pictographic') +file.close() + +# Handle script extensions. The get_script_extesion() function maintains a +# list of unique bitmaps representing lists of scripts, returning the offset +# in that list. Initialize the list with an empty set, which is used for +# characters that have no script extensions. + +script_lists = [[]] +scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0) + +for idx in range(len(scriptx_bidi_class)): + scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11) +bidi_class = None + +# Find the Boolean properties of each character. This next bit of magic creates +# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to +# the *same* list, which is not what we want. + +bprops = [[] for _ in range(MAX_UNICODE)] + +# Collect the properties from the various files + +for filename in bool_propsfiles: + try: + file = open('Unicode.tables/' + filename, 'r') + except IOError: + print(f"** Couldn't open {'Unicode.tables/' + filename}\n") + sys.exit(1) + + for line in file: + line = re.sub(r'#.*', '', line) + data = list(map(str.strip, line.split(';'))) + if len(data) <= 1: + continue + + try: + ix = bool_properties.index(data[1]) + except ValueError: + continue + + m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', data[0]) + char = int(m.group(1), 16) + if m.group(3) is None: + last = char + else: + last = int(m.group(3), 16) + + for i in range(char, last + 1): + bprops[i].append(ix) + + file.close() + +# The ASCII property isn't listed in any files, but it is easy enough to add +# it manually. + +ix = bool_properties.index("ASCII") +for i in range(128): + bprops[i].append(ix) + +# The Bidi_Mirrored property isn't listed in any property files. We have to +# deduce it from the file that lists the mirrored characters. + +ix = bool_properties.index("Bidi_Mirrored") + +try: + file = open('Unicode.tables/BidiMirroring.txt', 'r') +except IOError: + print(f"** Couldn't open {'Unicode.tables/BidiMirroring.txt'}\n") + sys.exit(1) + +for line in file: + line = re.sub(r'#.*', '', line) + data = list(map(str.strip, line.split(';'))) + if len(data) <= 1: + continue + c = int(data[0], 16) + bprops[c].append(ix) + +file.close() + +# Scan each character's boolean property list and created a list of unique +# lists, at the same time, setting the index in that list for each property in +# the bool_props vector. + +bool_props = [0] * MAX_UNICODE +bool_props_lists = [[]] + +for c in range(MAX_UNICODE): + s = set(bprops[c]) + for i in range(len(bool_props_lists)): + if s == set(bool_props_lists[i]): + break + else: + bool_props_lists.append(bprops[c]) + i += 1 + + bool_props[c] = i * bool_props_list_item_size + +# This block of code was added by PH in September 2012. It scans the other_case +# table to find sets of more than two characters that must all match each other +# caselessly. Later in this script a table of these sets is written out. +# However, we have to do this work here in order to compute the offsets in the +# table that are inserted into the main table. + +# The CaseFolding.txt file lists pairs, but the common logic for reading data +# sets only one value, so first we go through the table and set "return" +# offsets for those that are not already set. + +for c in range(MAX_UNICODE): + if other_case[c] != 0 and other_case[c + other_case[c]] == 0: + other_case[c + other_case[c]] = -other_case[c] + +# Now scan again and create equivalence sets. + +caseless_sets = [] + +for c in range(MAX_UNICODE): + o = c + other_case[c] + + # Trigger when this character's other case does not point back here. We + # now have three characters that are case-equivalent. + + if other_case[o] != -other_case[c]: + t = o + other_case[o] + + # Scan the existing sets to see if any of the three characters are already + # part of a set. If so, unite the existing set with the new set. + + appended = 0 + for s in caseless_sets: + found = 0 + for x in s: + if x == c or x == o or x == t: + found = 1 + + # Add new characters to an existing set + # TODO: make sure the data doesn't overflow a list[] + + if found: + found = 0 + for y in [c, o, t]: + for x in s: + if x == y: + found = 1 + if not found: + s.append(y) + appended = 1 + + # If we have not added to an existing set, create a new one. + + if not appended: + caseless_sets.append([c, o, t]) + +# End of loop looking for caseless sets. + +# Now scan the sets and set appropriate offsets for the characters. + +caseless_offsets = [0] * MAX_UNICODE + +offset = 1 +for s in caseless_sets: + for x in s: + caseless_offsets[x] = offset + offset += len(s) + 1 + +# End of block of code for creating offsets for caseless matching sets. + +# Scan the caseless sets, and for any non-ASCII character that has an ASCII +# character as its "base" other case, remove the other case. This makes it +# easier to handle those characters when the PCRE2 option for not mixing ASCII +# and non-ASCII is enabled. In principle one should perhaps scan for a +# non-ASCII alternative, but in practice these don't exist. + +for s in caseless_sets: + for x in s: + if x > 127 and x + other_case[x] < 128: + other_case[x] = 0 + +# Append a couple of extra caseless sets (unreferenced by the record objects) +# to hold the optional Turkish case equivalences. +turkish_dotted_i_index = offset +caseless_sets.append([0x69, 0x0130]) +caseless_sets.append([0x49, 0x0131]) + +# Combine all the tables + +table, records = combine_tables(script, category, break_props, + caseless_offsets, other_case, scriptx_bidi_class, bool_props) + +# Find the record size and create a string definition of the structure for +# outputting as a comment. + +record_size, record_struct = get_record_size_struct(list(records.keys())) + +# Find the optimum block size for the two-stage table + +min_size = sys.maxsize +for block_size in [2 ** i for i in range(5,10)]: + size = len(records) * record_size + stage1, stage2 = compress_table(table, block_size) + size += get_tables_size(stage1, stage2) + #print("/* block size {:3d} => {:5d} bytes */".format(block_size, size)) + if size < min_size: + min_size = size + min_stage1, min_stage2 = stage1, stage2 + min_block_size = block_size + + +# --------------------------------------------------------------------------- +# MAIN CODE FOR WRITING THE OUTPUT FILE +# --------------------------------------------------------------------------- + +# Open the output file (no return on failure). This call also writes standard +# header boilerplate. + +f = open_output("pcre2_ucd.c") + +# Output this file's heading text + +f.write("""\ +/* This file contains tables of Unicode properties that are extracted from +Unicode data files. See the comments at the start of maint/GenerateUcd.py for +details. + +As well as being part of the PCRE2 library, this file is #included by the +pcre2test program, which redefines the PRIV macro to change table names from +_pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present, +just one of these tables is actually needed. When compiling the library, some +headers are needed. */ + +#ifndef PCRE2_PCRE2TEST +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "pcre2_internal.h" +#endif /* PCRE2_PCRE2TEST */ + +/* The tables herein are needed only when UCP support is built, and in PCRE2 +that happens automatically with UTF support. This module should not be +referenced otherwise, so it should not matter whether it is compiled or not. +However a comment was received about space saving - maybe the guy linked all +the modules rather than using a library - so we include a condition to cut out +the tables when not needed. But don't leave a totally empty module because some +compilers barf at that. Instead, just supply some small dummy tables. */ + +#ifndef SUPPORT_UNICODE +const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}}; +const uint16_t PRIV(ucd_stage1)[] = {0}; +const uint16_t PRIV(ucd_stage2)[] = {0}; +const uint32_t PRIV(ucd_caseless_sets)[] = {0}; +const uint32_t PRIV(ucd_nocase_ranges)[] = {0}; +const uint32_t PRIV(ucd_nocase_ranges_size) = 0; +#else +\n""") + +# --- Output some variable heading stuff --- + +f.write("/* Total size: %d bytes, block size: %d. */\n\n" % (min_size, min_block_size)) +f.write('const char *PRIV(unicode_version) = "{}";\n\n'.format(unicode_version)) + +f.write("""\ +/* When recompiling tables with a new Unicode version, please check the types +in this structure definition with those in pcre2_internal.h (the actual field +names will be different). +\n""") + +f.write(record_struct) + +f.write(""" +/* If the 32-bit library is run in non-32-bit mode, character values greater +than 0x10ffff may be encountered. For these we set up a special record. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +const ucd_record PRIV(dummy_ucd_record)[] = {{ + ucp_Unknown, /* script */ + ucp_Cn, /* type unassigned */ + ucp_gbOther, /* grapheme break property */ + 0, /* case set */ + 0, /* other case */ + 0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */ + 0, /* bool properties offset */ + }}; +#endif +\n""") + +# --- Output the table of caseless character sets --- + +f.write("""\ +/* This table contains lists of characters that are caseless sets of +more than one character. Each list is terminated by NOTACHAR. */ + +const uint32_t PRIV(ucd_caseless_sets)[] = { + NOTACHAR, +""") + +for s in caseless_sets: + s = sorted(s) + for x in s: + f.write(' 0x%04x,' % x) + f.write(' NOTACHAR,\n') +f.write('};\n\n') + +# --- Output the indices of the Turkish caseless character sets --- + +f.write("""\ +/* This is the index, within ucd_caseless_sets, of the additional +Turkish case-equivalences. The dotted I ones are this offset; the +dotless I are +3 from here. */ + +const uint32_t PRIV(ucd_turkish_dotted_i_caseset) = %d; + +""" % (turkish_dotted_i_index)) + +# --- Other tables are not needed by pcre2test --- + +f.write("""\ +/* When #included in pcre2test, we don't need the table of digit sets, nor the +the large main UCD tables. */ + +#ifndef PCRE2_PCRE2TEST +\n""") + +# --- Output the nocase sets --- + +f.write("""\ +/* This table contains character ranges, where the characters in the range have +no other case. Both start and end values are excluded from the range. */ + +const uint32_t PRIV(ucd_nocase_ranges)[] = { +""") + +range_start = 0 +size = 0 +# The range size is bigger than eight characters. +expected_size = 8 +total = 0 + +for c in range(1, MAX_UNICODE): + if other_case[c] != 0 or c in [0x0130, 0x0131]: # add the two chars that gain casing in Turkish + if c - range_start > expected_size: + range_size = c - range_start - 1 + f.write(' 0x%04x, 0x%04x, /* %d */\n' % (range_start, c, range_size)) + total += range_size + size += 2 + range_start = c + +# The else case is unlikely +if other_case[MAX_UNICODE - 1] == 0 and MAX_UNICODE - range_start > expected_size: + range_size = MAX_UNICODE - range_start - 1 + f.write(' 0x%04x, 0x%04x, /* %d */\n' % (range_start, MAX_UNICODE, range_size)) + total += range_size + size += 2 + +f.write(' 0xffffffff, 0xffffffff /* terminator */\n};\n\n'); +f.write('/* Total: %d characters. */\nconst uint32_t PRIV(ucd_nocase_ranges_size) = %d;\n\n' % (total, size)) + +# --- Read Scripts.txt again for the sets of 10 digits. --- + +digitsets = [] +file = open('Unicode.tables/Scripts.txt', 'r', encoding='utf-8') + +for line in file: + m = re.match(r'([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s+;\s+\S+\s+#\s+Nd\s+', line) + if m is None: + continue + first = int(m.group(1),16) + last = int(m.group(2),16) + if ((last - first + 1) % 10) != 0: + f.write("ERROR: %04x..%04x does not contain a multiple of 10 characters" % (first, last), + file=sys.stderr) + while first < last: + digitsets.append(first + 9) + first += 10 +file.close() +digitsets.sort() + +f.write("""\ +/* This table lists the code points for the '9' characters in each set of +decimal digits. It is used to ensure that all the digits in a script run come +from the same set. */ + +const uint32_t PRIV(ucd_digit_sets)[] = { +""") + +f.write(" %d, /* Number of subsequent values */" % len(digitsets)) +count = 8 +for d in digitsets: + if count == 8: + f.write("\n ") + count = 0 + f.write(" 0x%05x," % d) + count += 1 +f.write("\n};\n\n") + +f.write("""\ +/* This vector is a list of script bitsets for the Script Extension property. +The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as +ucd_script_sets_item_size. */ + +const uint32_t PRIV(ucd_script_sets)[] = { +""") +write_bitsets(script_lists, script_list_item_size) + +f.write("""\ +/* This vector is a list of bitsets for Boolean properties. The number of +32_bit words in each bitset is #defined as ucd_boolprop_sets_item_size in +pcre2_ucp.h. */ + +const uint32_t PRIV(ucd_boolprop_sets)[] = { +""") +write_bitsets(bool_props_lists, bool_props_list_item_size) + + +# Output the main UCD tables. + +f.write("""\ +/* These are the main two-stage UCD tables. The fields in each record are: +script (8 bits), character type (8 bits), grapheme break property (8 bits), +offset to multichar other cases or zero (8 bits), offset to other case or zero +(32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed +into a 16-bit field, and offset in binary properties table (16 bits). */ +\n""") + +write_records(records, record_size) +write_table(min_stage1, 'PRIV(ucd_stage1)') +write_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size) + +f.write("#if UCD_BLOCK_SIZE != %d\n" % min_block_size) +f.write("""\ +#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h +#endif +#endif /* SUPPORT_UNICODE */ + +#endif /* PCRE2_PCRE2TEST */ + +/* End of pcre2_ucd.c */ +""") + +f.close() + +# End diff --git a/3rd/pcre2/maint/GenerateUcpHeader.py b/3rd/pcre2/maint/GenerateUcpHeader.py new file mode 100644 index 00000000..425678b4 --- /dev/null +++ b/3rd/pcre2/maint/GenerateUcpHeader.py @@ -0,0 +1,98 @@ +#! /usr/bin/env python3 + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ + +# This script generates the pcre2_ucp.h file from Unicode data files. This +# header uses enumerations to give names to Unicode property types and script +# names. + +# This script was created in December 2021 as part of the Unicode data +# generation refactoring. + + +# Import common data lists and functions + +from GenerateCommon import \ + bidi_classes, \ + bool_properties, \ + bool_props_list_item_size, \ + break_properties, \ + category_names, \ + general_category_names, \ + script_list_item_size, \ + script_names, \ + open_output + +# Open the output file (no return on failure). This call also writes standard +# header boilerplate. + +f = open_output("pcre2_ucp.h") + +# Output this file's heading text + +f.write("""\ +#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD +#define PCRE2_UCP_H_IDEMPOTENT_GUARD + +/* This file contains definitions of the Unicode property values that are +returned by the UCD access macros and used throughout PCRE2. + +IMPORTANT: The specific values of the first two enums (general and particular +character categories) are assumed by the table called catposstab in the file +pcre2_auto_possess.c. They are unlikely to change, but should be checked after +an update. */ +\n""") + +f.write("/* These are the general character categories. */\n\nenum {\n") +for i in general_category_names: + f.write(" ucp_%s,\n" % i) +f.write("};\n\n") + +f.write("/* These are the particular character categories. */\n\nenum {\n") +for i in range(0, len(category_names), 2): + f.write(" ucp_%s, /* %s */\n" % (category_names[i], category_names[i+1])) +f.write("};\n\n") + +f.write("/* These are Boolean properties. */\n\nenum {\n") +for i in bool_properties: + f.write(" ucp_%s,\n" % i) + +f.write(" /* This must be last */\n") +f.write(" ucp_Bprop_Count\n};\n\n") + +f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n") +f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size) + +f.write("/* These are the bidi class values. */\n\nenum {\n") +for i in range(0, len(bidi_classes), 2): + sp = ' ' * (4 - len(bidi_classes[i])) + f.write(" ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1])) +f.write("};\n\n") + +f.write("/* These are grapheme break properties. The Extended Pictographic " + "property\ncomes from the emoji-data.txt file. */\n\nenum {\n") +for i in range(0, len(break_properties), 2): + sp = ' ' * (21 - len(break_properties[i])) + f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1])) +f.write("};\n\n") + +f.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n") +for i in script_names: + if i == "Unknown": + f.write("\n /* Scripts which has no characters in other scripts. */\n") + f.write(" ucp_%s,\n" % i) +f.write("\n") + +f.write(" /* This must be last */\n") +f.write(" ucp_Script_Count\n};\n\n") + +f.write("/* Size of entries in ucd_script_sets[] */\n\n") +f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size) + +f.write("#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n") +f.write("/* End of pcre2_ucp.h */\n") + +f.close() + +# End diff --git a/3rd/pcre2/maint/GenerateUcpTables.py b/3rd/pcre2/maint/GenerateUcpTables.py new file mode 100644 index 00000000..38119417 --- /dev/null +++ b/3rd/pcre2/maint/GenerateUcpTables.py @@ -0,0 +1,203 @@ +#! /usr/bin/env python3 + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ + +# This script generates the pcre2_ucptables.c file, which contains tables for +# recognizing Unicode property names. It is #included by pcre2_tables.c. In +# order to reduce the number of relocations when loading the PCRE2 library, the +# names are held as a single large string, with offsets in the table. This is +# tedious to maintain by hand. Therefore, a script is used to generate the +# table. + +# This script was created in December 2021 based on the previous GenerateUtt +# script, whose output had to be manually edited into pcre2_tables.c. Here is +# the history of the original script: + +# ----------------------------------------------------------------------------- +# Modified by PH 17-March-2009 to generate the more verbose form that works +# for UTF-support in EBCDIC as well as ASCII environments. +# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0. +# Modified by PH 04-May-2010 to add new "X.." special categories. +# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0 +# Modified by ChPe 30-September-2012 to add this note; no other changes were +# necessary for Unicode 6.2.0 support. +# Modfied by PH 26-February-2013 to add the Xuc special category. +# Comment modified by PH 13-May-2014 to update to PCRE2 file names. +# Script updated to Python 3 by running it through the 2to3 converter. +# Added script names for Unicode 7.0.0, 20-June-2014. +# Added script names for Unicode 8.0.0, 19-June-2015. +# Added script names for Unicode 10.0.0, 02-July-2017. +# Added script names for Unicode 11.0.0, 03-July-2018. +# Added 'Unknown' script, 01-October-2018. +# Added script names for Unicode 12.1.0, 27-July-2019. +# Added script names for Unicode 13.0.0, 10-March-2020. +# Added Script names for Unicode 14.0.0, PCRE2-10.39 +# Added support for bidi class and bidi control, 06-December-2021 +# This also involved lower casing strings and removing underscores, in +# accordance with Unicode's "loose matching" rules, which Perl observes. +# Changed default script type from PT_SC to PT_SCX, 18-December-2021 +# ----------------------------------------------------------------------------- +# +# Note subsequent changes here: +# +# 27-December-2021: Added support for 4-letter script abbreviations. +# 10-January-2022: Further updates for Boolean property support +# ----------------------------------------------------------------------------- + + +# Import common data lists and functions + +from GenerateCommon import \ + abbreviations, \ + bool_properties, \ + bidi_classes, \ + category_names, \ + general_category_names, \ + script_names, \ + open_output + +# Open the output file (no return on failure). This call also writes standard +# header boilerplate. + +f = open_output("pcre2_ucptables.c") + +# The list in bidi_classes contains just the Unicode classes such as AN, LRE, +# etc., along with comments. We need to add "bidi" in front of each value, in +# order to create names that don't clash with other types of property. + +bidi_class_names = [] +for i in range(0, len(bidi_classes), 2): + bidi_class_names.append("bidi" + bidi_classes[i]) + +# Remove the comments from other lists that contain them. + +category_names = category_names[::2] + +# Create standardized versions of the names by lowercasing and removing +# underscores. + +def stdname(x): + return x.lower().replace('_', '') + +def stdnames(x): + y = [''] * len(x) + for i in range(len(x)): + y[i] = stdname(x[i]) + return y + +std_category_names = stdnames(category_names) +std_general_category_names = stdnames(general_category_names) +std_bidi_class_names = stdnames(bidi_class_names) +std_bool_properties = stdnames(bool_properties) + +# Create the table, starting with the Unicode script, category and bidi class +# names. We keep both the standardized name and the original, because the +# latter is used for the ucp_xx names. NOTE: for the script abbreviations, we +# still use the full original names. + +utt_table = [] + +scx_end = script_names.index('Unknown') + +for idx, name in enumerate(script_names): + pt_type = 'PT_SCX' if idx < scx_end else 'PT_SC' + utt_table.append((stdname(name), name, pt_type)) + for abbrev in abbreviations[name]: + utt_table.append((stdname(abbrev), name, pt_type)) + +# Add the remaining property lists + +utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names))) +utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names))) +utt_table += list(zip(std_bidi_class_names, bidi_class_names, ['PT_BIDICL'] * len(bidi_class_names))) + +for name in bool_properties: + utt_table.append((stdname(name), name, 'PT_BOOL')) + if name in abbreviations: + for abbrev in abbreviations[name]: + utt_table.append((stdname(abbrev), name, 'PT_BOOL')) + +# Now add specials and synonyms. Note both the standardized and capitalized +# forms are needed. + +utt_table.append(('any', 'Any', 'PT_ANY')) +utt_table.append(('l&', 'L&', 'PT_LAMP')) +utt_table.append(('lc', 'LC', 'PT_LAMP')) +utt_table.append(('xan', 'Xan', 'PT_ALNUM')) +utt_table.append(('xps', 'Xps', 'PT_PXSPACE')) +utt_table.append(('xsp', 'Xsp', 'PT_SPACE')) +utt_table.append(('xuc', 'Xuc', 'PT_UCNC')) +utt_table.append(('xwd', 'Xwd', 'PT_WORD')) + +# Remove duplicates from the table and then sort it. + +utt_table = list(set(utt_table)) +utt_table.sort() + +# Output file-specific heading + +f.write("""\ +#ifdef SUPPORT_UNICODE + +/* The PRIV(utt)[] table below translates Unicode property names into type and +code values. It is searched by binary chop, so must be in collating sequence of +name. Originally, the table contained pointers to the name strings in the first +field of each entry. However, that leads to a large number of relocations when +a shared library is dynamically loaded. A significant reduction is made by +putting all the names into a single, large string and using offsets instead. +All letters are lower cased, and underscores are removed, in accordance with +the "loose matching" rules that Unicode advises and Perl uses. */ +\n""") + +# We have to use STR_ macros to define the strings so that it all works in +# UTF-8 mode on EBCDIC platforms. + +for utt in utt_table: + f.write('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND'))) + for c in utt[0]: + if c == '&': + f.write(' STR_AMPERSAND') + else: + f.write(' STR_%s' % c); + f.write(' "\\0"\n') + +# Output the long string of concatenated names + +f.write('\nconst char PRIV(utt_names)[] =\n') +last = '' +for utt in utt_table: + if utt == utt_table[-1]: + last = ';' + f.write(' STRING_%s0%s\n' % (utt[0].replace('&', '_AMPERSAND'), last)) + +# Output the property type table + +f.write('\nconst ucp_type_table PRIV(utt)[] = {\n') +offset = 0 +last = ',' +for utt in utt_table: + if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', + 'PT_SPACE', 'PT_UCNC', 'PT_WORD'): + value = '0' + else: + value = 'ucp_' + utt[1] + if utt == utt_table[-1]: + last = '' + f.write(' { %3d, %s, %s }%s\n' % (offset, utt[2], value, last)) + offset += len(utt[0]) + 1 +f.write('};\n\n') + +# Ending text + +f.write("""\ +const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); + +#endif /* SUPPORT_UNICODE */ + +/* End of pcre2_ucptables.c */ +""") + +f.close + +# End diff --git a/3rd/pcre2/maint/ManyConfigTests b/3rd/pcre2/maint/ManyConfigTests new file mode 100644 index 00000000..c3a2b1a0 --- /dev/null +++ b/3rd/pcre2/maint/ManyConfigTests @@ -0,0 +1,480 @@ +#! /bin/sh + +# This is a script for the use of PCRE2 maintainers. It configures and builds +# PCRE2 with a variety of configuration options, and in each case runs the +# tests to ensure that all goes well. Every possible combination would take far +# too long, so we use a representative sample. This script should be run in the +# PCRE2 source directory. + +# While debugging, it is sometimes useful to be able to cut out some of the +# tests, in order to run those that are giving errors. The following options +# do this: +# +# -noasan skip the test that uses -fsanitize=address +# -nousan skip the test that uses -fsanitize=undefined +# -nodebug skip the test that uses --enable-debug +# -nojit skip all JIT tests +# -nojitmain skip non-valgrind JIT tests +# -nojitvalgrind skip JIT tests with valgrind +# -nomain skip all the main (non-JIT) set of tests +# -nomainvalgrind skip the main (non-JIT) valgrind tests +# -notmp skip the tests in a temporary directory +# -notmpjit skip the JIT test in a temporary directory +# -novalgrind skip all the valgrind tests + +# Alternatively, if any of those names are given with '+' instead of '-no', +# only those groups named with '+' are run (e.g. +jit). If -dummy is given, +# no tests are actually run - this provides a means of testing the selectors. + +# The -v option causes a call to 'pcre2test -C' to happen for each +# configuration. + +useasan=1 +useusan=1 +usedebug=1 +usejit=1 +usejitvalgrind=1 +usemain=1 +usemainvalgrind=1 +usetmp=1 +usetmpjit=1 +usevalgrind=1 + +dummy=0 +seenplus=0 +verbose=0 + +while [ $# -gt 0 ] ; do + case $1 in + +*) if [ $seenplus -eq 0 ]; then + useasan=0 + useusan=0 + usedebug=0 + usejit=0 + usejitvalgrind=0 + usemain=0 + usemainvalgrind=0 + usetmp=0 + usetmpjit=0 + usevalgrind=0 + seenplus=1 + fi;; + esac + + case $1 in + -dummy) dummy=1;; + -v) verbose=1;; + -noasan) useasan=0;; + -nousan) useusan=0;; + -nodebug) usedebug=0;; + -nojit) usejit=0; usejitvalgrind=0; usetmpjit=0;; + -nojitmain) usejit=0;; + -nojitvalgrind) usejitvalgrind=0;; + -nomain) usemain=0; usemainvalgrind=0;; + -nomainvalgrind) usemainvalgrind=0;; + -notmp) usetmp=0; usetmpjit=0;; + -notmpjit) usetmpjit=0;; + -novalgrind) usevalgrind=0;; + +asan) useasan=1;; + +usan) useusan=1;; + +debug) usedebug=1;; + +jit) usejit=1; usejitvalgrind=1; usetmpjit=1;; + +jitmain) usejit=1;; + +jitvalgrind) usejitvalgrind=1;; + +main) usemain=1; usemainvalgrind=1;; + +mainvalgrind) usemainvalgrind=1;; + +tmp) usetmp=1;; + +tmpjit) usetmpjit=1;; + +valgrind) usevalgrind=1; usejitvalgrind=1; usemainvalgrind=1;; + *) echo "Unknown option '$1'"; exit 1;; + esac + shift +done + +if [ $usejitvalgrind -eq 0 -a $usemainvalgrind -eq 0 ] ; then + usevalgrind=0 +fi + +# This is in case the caller has set aliases (as I do - PH) + +unset cp ls mv rm + +# This is a temporary directory for testing out-of-line builds + +tmp=/tmp/pcre2testing + +# Don't bother with compiler optimization for most tests; it just slows down +# compilation a lot (and running the tests themselves is quick). However, one +# special test turns optimization on, because it can provoke some compiler +# warnings. + +CFLAGS="-g" +OFLAGS="-O0" +CC="${CC:=cc}" +ISGCC=0 + +# If the compiler is gcc, add a lot of warning switches. + +$CC --version >/tmp/pcre2ccversion 2>/dev/null +if [ $? -eq 0 ] && grep GCC /tmp/pcre2ccversion >/dev/null; then + ISGCC=1 + CFLAGS="$CFLAGS -Wall" + CFLAGS="$CFLAGS -Wno-overlength-strings" + CFLAGS="$CFLAGS -Wpointer-arith" + CFLAGS="$CFLAGS -Wwrite-strings" + CFLAGS="$CFLAGS -Wundef -Wshadow" + CFLAGS="$CFLAGS -Wmissing-field-initializers" + CFLAGS="$CFLAGS -Wunused-parameter" + CFLAGS="$CFLAGS -Wextra -Wformat" + CFLAGS="$CFLAGS -Wbad-function-cast" + CFLAGS="$CFLAGS -Wmissing-declarations" + CFLAGS="$CFLAGS -Wnested-externs" + CFLAGS="$CFLAGS -pedantic" + CFLAGS="$CFLAGS -Wuninitialized" + CFLAGS="$CFLAGS -Wmaybe-uninitialized" + CFLAGS="$CFLAGS -Wmissing-prototypes" + CFLAGS="$CFLAGS -Wstrict-prototypes" + CFLAGS="$CFLAGS -Warray-bounds" + CFLAGS="$CFLAGS -Wformat-overflow=2" +fi +rm -f /tmp/pcre2ccversion + +# This function runs a single test with the set of configuration options that +# are in $opts. The source directory must be set in srcdir. The function must +# be defined as "runtest()" not "function runtest()" in order to run on +# Solaris. + +runtest() + { + rm -f $srcdir/pcre2test $srcdir/pcre2grep $srcdir/pcre2_jit_test $srcdir/pcre2posix_test + testcount=`expr $testcount + 1` + + if [ "$opts" = "" ] ; then + echo "[$testcount/$testtotal] Configuring with: default settings" + else + echo "[$testcount/$testtotal] Configuring with:" + echo " $opts" + fi + + if [ $dummy -eq 1 ]; then return; fi + + CFLAGS="$CFLAGS" \ + $srcdir/configure $opts >/dev/null 2>teststderrM + if [ $? -ne 0 ]; then + echo " " + echo "******** Error while configuring ********" + cat teststderrM + exit 1 + fi + +# There is an infelicity in the Autotools world (as of October 2015) which +# causes the message +# +# ar: `u' modifier ignored since `D' is the default (see `U') +# +# to be output while linking. This triggers an unwanted error report from this +# script, because it expects no stderr output while making. To get round this +# we filter the stderr output through sed, removing all occurrences of the +# above lines. Just for paranoia, check that sed is available before doing +# this. + + echo "Making" + make -j >/dev/null 2>teststderrM + makeRC=$? + if command -v sed >/dev/null 2>&1 ; then + sed "/\`u' modifier ignored since \`D' is the default/ d" \ + teststderrM > teststderrMM + mv -f teststderrMM teststderrM + fi + if [ $makeRC -ne 0 -o -s teststderrM ]; then + echo " " + echo "******** Errors or warnings while making ********" + echo " " + cat teststderrM + exit 1 + fi + + if [ $verbose -eq 1 ]; then + ./pcre2test -C + fi + + ./pcre2test -C jit >/dev/null + jit=$? + ./pcre2test -C pcre2-8 >/dev/null + pcre2_8=$? + + echo "Running PCRE2 library tests $withvalgrind" + $srcdir/RunTest $valgrind >teststdoutM 2>teststderrM + + if [ $? -ne 0 -o -s teststderrM ]; then + echo " " + echo "**** Test failed ****" + if [ -s teststderrM ] ; then + cat teststderrM + else + cat teststdoutM + fi + exit 1 + fi + + if [ $pcre2_8 -gt 0 ]; then + echo "Running pcre2grep tests $withvalgrind" + $srcdir/RunGrepTest $valgrind >teststdoutM 2>teststderrM + if [ $? -ne 0 -o -s teststderrM ]; then + echo " " + echo "**** Test failed ****" + cat teststderrM + cat teststdoutM + exit 1 + fi + echo "Running pcre2posix test $withvalgrind" + $valgrind ./pcre2posix_test >teststdoutM 2>teststderrM + + if [ $? -ne 0 ]; then + echo " " + echo "**** Test failed ****" + exit 1 + fi + else + echo "Skipping pcre2grep and pcre2posix tests: 8-bit library not compiled" + fi + + if [ "$jit" -gt 0 ]; then + echo "Running JIT regression tests $withvalgrind" + $jrvalgrind ./pcre2_jit_test >teststdoutM 2>teststderrM + if [ $? -ne 0 -o -s teststderrM ]; then + echo " " + echo "**** Test failed ****" + cat teststderrM + cat teststdoutM + exit 1 + fi + else + echo "Skipping JIT regression tests: JIT is not enabled" + fi + } + +# Update the total count whenever a new test is added; it is used to show +# progess as each test is run. + +testtotal=`expr 17 \* $usemain + \ + 1 \* $usemain \* $usedebug + \ + 1 \* $usetmp + 1 \* $usetmpjit + \ + 1 \* $ISGCC \* $usemain + \ + 1 \* $ISGCC \* $usemain \* $useasan + \ + 1 \* $ISGCC \* $usemain \* $useusan + \ + 13 \* $usejit + \ + 2 \* $usemainvalgrind + \ + 2 \* $usejitvalgrind` + +testcount=0 + +if [ $testtotal -eq 0 ] ; then + echo "** No tests selected" + exit 1 +fi + +valgrind= +jrvalgrind= +withvalgrind= +srcdir=. +export srcdir + +if [ $usejit -ne 0 ]; then + enable_jit=--enable-jit +else + enable_jit= +fi + +# If gcc is in use, run a maximally configured test with -O2, because that can +# throw up warnings that are not detected with -O0. Then run a second test with +# -fsanitize=address, which also may throw up new warnings as well as checking +# things at runtime. Finally, run another test using -fsanitize=undefined +# -std-gnu99 to check for runtime actions that are not well defined. + +if [ $ISGCC -ne 0 -a $usemain -ne 0 ]; then + echo "---------- Maximally configured test with -O2 ----------" + SAVECFLAGS="$CFLAGS" + CFLAGS="-O2 $CFLAGS" + echo "CFLAGS=$CFLAGS" + opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32" + runtest + if [ $useasan -ne 0 ]; then + echo "---------- Maximally configured test with -fsanitize=address ----------" +# Following a kernel change, sanitize address doesn't work unless the extra +# PIE options are also set. + CFLAGS="$OFLAGS $SAVECFLAGS -no-pie -fno-PIE -fsanitize=address" + echo "CFLAGS=$CFLAGS" + opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32" + runtest + fi +# This also seems to be the case for sanitize undefined. + if [ $useusan -ne 0 ]; then + echo "------- Maximally configured test with -fsanitize=undefined -fno-sanitize=alignment -std=gnu99 -------" + CFLAGS="$OFLAGS $SAVECFLAGS -no-pie -fno-PIE -fsanitize=undefined -fno-sanitize=alignment -std=gnu99" + echo "CFLAGS=$CFLAGS" + opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32" + runtest + fi + CFLAGS="$SAVECFLAGS" +fi + +# This set of tests builds PCRE2 and runs the tests with a variety of configure +# options, in the current (source) directory. The empty configuration builds +# with all the default settings. As well as testing that these options work, we +# use --disable-shared or --disable-static except for the default test (which +# builds both) to save a bit of time by building only one version of the +# library for the subsequent tests. + +echo "---------- CFLAGS for the remaining tests ----------" +CFLAGS="$OFLAGS $CFLAGS" +echo "CFLAGS=$CFLAGS" + +if [ $usemain -ne 0 ]; then + if [ $usedebug -ne 0 ]; then + echo "---------- Maximally configured test with --enable-debug ----------" + opts="--disable-shared $enable_jit --enable-pcre2-16 --enable-pcre2-32 --enable-debug" + runtest + fi + + echo "---------- Non-JIT tests in the current directory ----------" + for opts in \ + "" \ + "--disable-static" \ + "--disable-shared" \ + "--disable-unicode --disable-shared --enable-never-backslash-C" \ + "--with-link-size=3 --disable-shared --disable-pcre2grep-callout" \ + "--disable-unicode --enable-rebuild-chartables --disable-shared" \ + "--disable-unicode --enable-newline-is-any --disable-shared" \ + "--disable-unicode --enable-newline-is-cr --disable-shared" \ + "--disable-unicode --enable-newline-is-crlf --disable-shared" \ + "--disable-unicode --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" \ + "--enable-newline-is-any --disable-static" \ + "--disable-unicode --enable-pcre2-16 --enable-debug" \ + "--enable-pcre2-16 --disable-shared" \ + "--disable-unicode --enable-pcre2-32" \ + "--enable-pcre2-32 --disable-shared" \ + "--disable-unicode --enable-pcre2-32 --enable-pcre2-16 --disable-shared" \ + "--disable-unicode --enable-pcre2-32 --enable-pcre2-16 --disable-pcre2-8 --disable-shared" + do + runtest + done +fi + +# Now run the JIT tests unless disabled + +if [ $usejit -ne 0 ]; then + echo "---------- JIT tests in the current directory ----------" + for opts in \ + "--disable-unicode --enable-jit --disable-shared" \ + "--enable-jit --disable-shared" \ + "--enable-jit --with-link-size=3 --disable-shared" \ + "--enable-jit --enable-pcre2-16 --disable-shared" \ + "--disable-unicode --enable-jit --enable-pcre2-16 --disable-pcre2-8 --disable-shared" \ + "--enable-jit --enable-pcre2-16 --disable-pcre2-8 --disable-shared" \ + "--enable-jit --enable-pcre2-16 --with-link-size=3 --disable-shared" \ + "--enable-jit --enable-pcre2-16 --with-link-size=4 --disable-shared" \ + "--enable-jit --enable-pcre2-32 --disable-shared" \ + "--disable-unicode --enable-jit --enable-pcre2-32 --disable-pcre2-8 --disable-shared" \ + "--enable-jit --enable-pcre2-32 --disable-pcre2-8 --disable-shared" \ + "--enable-jit --enable-pcre2-32 --with-link-size=4 --disable-shared" \ + "--enable-jit --enable-pcre2-32 --enable-pcre2-16 --disable-pcre2-8 --enable-newline-is-anycrlf --enable-bsr-anycrlf --disable-shared" + do + runtest + done +fi + +# Now re-run some of the tests under valgrind. + +if [ $usevalgrind -ne 0 ]; then + echo "---------- Tests in the current directory using valgrind ----------" + valgrind=valgrind + withvalgrind="with valgrind" + + if [ $usemainvalgrind -ne 0 ]; then + for opts in \ + "--disable-shared" \ + "--with-link-size=3 --enable-pcre2-16 --enable-pcre2-32 --disable-shared" + do + opts="--enable-valgrind $opts" + runtest + done + fi + + if [ $usejitvalgrind -ne 0 ]; then + jrvalgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --suppressions=$srcdir/testdata/valgrind-jit.supp" + for opts in \ + "--enable-jit --disable-shared" \ + "--enable-jit --enable-pcre2-16 --enable-pcre2-32" + do + opts="--enable-valgrind $opts" + runtest + done + fi +fi + +valgrind= +jrvalgrind= +withvalgrind= + +# Clean up the distribution and then do at least one build and test in a +# directory other than the source directory. It doesn't work unless the +# source directory is cleaned up first. + +if [ -f Makefile ]; then + echo "Running 'make distclean'" + make distclean >/dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "** 'make distclean' failed" + exit 1 + fi +fi + +echo "---------- End of tests in the source directory ----------" +echo "Removing teststdoutM and teststderrM" +rm -rf teststdoutM teststderrM + +if [ $usetmp -ne 0 -o $usetmpjit -ne 0 ]; then + srcdir=`pwd` + export srcdir + + if [ ! -e $tmp ]; then + mkdir $tmp + fi + + if [ ! -d $tmp ]; then + echo "** Failed to create $tmp or it is not a directory" + exit 1 + fi + + cd $tmp + if [ $? -ne 0 ]; then + echo "** Failed to cd to $tmp" + exit 1 + fi + + if [ $usetmp -ne 0 ]; then + echo "---------- Tests in the $tmp directory ----------" + for opts in \ + "--disable-shared" + do + runtest + done + fi + + if [ $usetmpjit -ne 0 ]; then + echo "---------- JIT tests in the $tmp directory ----------" + for opts in \ + "--enable-jit --disable-shared" + do + runtest + done + fi + + echo "Removing $tmp" + rm -rf $tmp +fi + +echo "---------- All done ----------" + +# End diff --git a/3rd/pcre2/maint/PrepareRelease b/3rd/pcre2/maint/PrepareRelease new file mode 100644 index 00000000..2a375820 --- /dev/null +++ b/3rd/pcre2/maint/PrepareRelease @@ -0,0 +1,330 @@ +#! /bin/bash + +# Script to prepare the files for building a PCRE2 release. It does some +# processing of the documentation and detrails files. + +# You must run this script before runnning "make dist". If its first argument +# is "doc", it stops after preparing the documentation. There are no other +# arguments. The script makes use of the following files: + +# 132html A Perl script that converts a .1 or .3 man page into HTML. It +# "knows" the relevant troff constructs that are used in the PCRE2 +# man pages. + +# CheckMan A Perl script that checks man pages for typos in the mark up. + +# CleanTxt A Perl script that cleans up the output of "nroff -man" by +# removing backspaces and other redundant text so as to produce +# a readable .txt file. + +# Detrail A Perl script that removes trailing spaces from files. + +# doc/index.html.src +# A file that is copied as index.html into the doc/html directory +# when the HTML documentation is built. It works like this so that +# doc/html can be deleted and re-created from scratch. + +# README & NON-AUTOTOOLS-BUILD +# These files are copied into the doc/html directory, with .txt +# extensions so that they can by hyperlinked from the HTML +# documentation, because some people just go to the HTML without +# looking for text files. + +# Set the LANG to C, because nroff converts ASCII "HYPHEN-MINUS" to Unicode +# "HYPHEN" if the system is using a UTF-8 locale (like "C.UTF-8"). +export LANG=C + +# Extract the current release version from configure.ac. +CURRENT_RELEASE=`grep -E 'm4_define\(pcre2_(major|minor|prerelease)' configure.ac | \ + grep -E -o '\[.*\]' | \ + sed -E -e '1s/$/./' | \ + tr -d '[]\n'` + +# First, sort out the documentation. Remove pcre2demo.3 first because it won't +# pass the markup check (it is created below, using markup that none of the +# other pages use). + +cd doc +echo Processing documentation + +/bin/rm -f pcre2demo.3 + +# Check the remaining man pages + +perl ../maint/CheckMan *.1 *.3 +if [ $? != 0 ] ; then exit 1; fi + +# Verify the version number in the man pages + +for file in *.1 *.3 ; do + if ! grep -E ".TH.*\"PCRE2 $CURRENT_RELEASE\"" "$file" >/dev/null ; then + echo "Version number in $file does not match current release" + exit 1 + fi +done + +# Make Text form of the documentation. It needs some mangling to make it +# tidy for online reading. Concatenate all the .3 stuff, but omit the +# individual function pages. + +cat <pcre2.txt +----------------------------------------------------------------------------- +This file contains a concatenation of the PCRE2 man pages, converted to plain +text format for ease of searching with a text editor, or for use on systems +that do not have a man page processor. The small individual files that give +synopses of each function in the library have not been included. Neither has +the pcre2demo program. There are separate text files for the pcre2grep and +pcre2test commands. +----------------------------------------------------------------------------- + + +End + +echo "Making pcre2.txt" +for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \ + pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \ + pcre2posix pcre2sample pcre2serialize pcre2syntax \ + pcre2unicode ; do + echo " Processing $file.3" + nroff -c -man $file.3 >$file.rawtxt + perl ../maint/CleanTxt <$file.rawtxt >>pcre2.txt + /bin/rm $file.rawtxt + echo "------------------------------------------------------------------------------" >>pcre2.txt + if [ "$file" != "pcre2sample" ] ; then + echo "" >>pcre2.txt + echo "" >>pcre2.txt + fi +done + +# The three commands +for file in pcre2test pcre2grep pcre2-config ; do + echo Making $file.txt + nroff -c -man $file.1 >$file.rawtxt + perl ../maint/CleanTxt <$file.rawtxt >$file.txt + /bin/rm $file.rawtxt +done + + +# Make pcre2demo.3 from the pcre2demo.c source file + +echo "Making pcre2demo.3" +perl <<"END" >pcre2demo.3 + use Time::Piece; + open(VH, "<", "../src/config.h.generic") || die "Failed to open src/config.h.generic\n"; + open(IN, "<", "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n"; + open(OUT, ">", "pcre2demo.3") || die "Failed to open pcre2demo.3\n"; + my $version; + while () + { + chomp; + if ( /^#define PACKAGE_STRING "([^"]+)"/ ) { $version = $1 ; last } + } + my $t = `git log -n1 --date=format:"%d %B %Y" --format=%cd ../src/pcre2demo.c`; + chomp $t; + print OUT ".TH PCRE2DEMO 3 \"", $t, '" "', $version, "\"\n" . + ".\\\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!\n" . + ".SH NAME\n" . + "PCRE2DEMO - A demonstration C program for PCRE2\n" . + ".SH \"SOURCE CODE\"\n" . + ".rs\n" . + ".sp\n" . + ".\\\" Start example.\n" . + ".de EX\n" . + ". do ds mF \\\\n[.fam]\n" . + ". nr mE \\\\n(.f\n" . + ". nf\n" . + ". nh\n" . + ". do fam C\n" . + ". ft CW\n" . + "..\n" . + ".\n" . + ".\n" . + ".\\\" End example.\n" . + ".de EE\n" . + ". do fam \\\\*(mF\n" . + ". ft \\\\n(mE\n" . + ". fi\n" . + ". hy \\\\n(HY\n" . + "..\n" . + ".\n" . + ".RS -7\n" . + ".EX\n" ; + while () + { + s/\\/\\e/g; + print OUT; + } + print OUT ".EE\n"; + close(IN); + close(OUT); +END +if [ $? != 0 ] ; then exit 1; fi + + +# Verify that `man` can process the pages without warnings. + +for file in *.1 *.3 ; do + MAN_OUT=`MANROFFSEQ='' MANWIDTH=80 man --warnings=w,all -E UTF-8 -l -Tutf8 -Z "$file" 2>&1 >/dev/null` + if [ "$MAN_OUT" != "" ]; then + printf "Running man generated warnings:\n%s\n" "$MAN_OUT" + exit 1 + fi +done + + +# Make HTML form of the documentation. + +echo "Making HTML documentation" +/bin/rm html/* +cp index.html.src html/index.html +cp ../README html/README.txt +cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt + +for file in *.1 ; do + base=`basename $file .1` + echo " Making $base.html" + perl ../maint/132html -toc $base <$file >html/$base.html +done + +# Exclude table of contents for function summaries. It seems that expr +# forces an anchored regex. Also exclude them for small pages that have +# only one section. + +for file in *.3 ; do + base=`basename $file .3` + toc=-toc + if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi + if [ "$base" = "pcre2sample" ] || \ + [ "$base" = "pcre2compat" ] || \ + [ "$base" = "pcre2demo" ] || \ + [ "$base" = "pcre2limits" ] || \ + [ "$base" = "pcre2unicode" ] ; then + toc="" + fi + echo " Making $base.html" + perl ../maint/132html $toc $base <$file >html/$base.html + if [ $? != 0 ] ; then exit 1; fi +done + +# End of documentation processing; stop if only documentation required. + +cd .. +echo Documentation done +if [ "$1" = "doc" ] ; then exit; fi + +# These files are detrailed; do not detrail the test data because there may be +# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF +# line endings and the detrail script removes all trailing white space. The +# configure files are also omitted from the detrailing. + +txt_files=( + AUTHORS.md + BUILD.bazel + CMakeLists.txt + COPYING + ChangeLog + HACKING + INSTALL + LICENCE.md + MODULE.bazel + Makefile.am + NEWS + NON-AUTOTOOLS-BUILD + README + RunGrepTest + RunTest + SECURITY.md + WORKSPACE.bazel + build.zig + configure.ac + libpcre2-8.pc.in + libpcre2-16.pc.in + libpcre2-32.pc.in + libpcre2-posix.pc.in + pcre2-config.in + perltest.sh + cmake/COPYING-CMAKE-SCRIPTS + cmake/{*.cmake,*.cmake.in} + m4/ax_pthread.m4 + m4/pcre2_visibility.m4 + doc/p* + doc/html/* + ) + +crlf_files=( + RunGrepTest.bat + RunTest.bat + ) + +c_files=( + config-cmake.h.in + src/pcre2.h.in + src/pcre2_auto_possess.c + src/pcre2_chartables.c.dist + src/pcre2_chkdint.c + src/pcre2_compile.c + src/pcre2_compile.h + src/pcre2_compile_class.c + src/pcre2_config.c + src/pcre2_context.c + src/pcre2_convert.c + src/pcre2_dfa_match.c + src/pcre2_dftables.c + src/pcre2_error.c + src/pcre2_extuni.c + src/pcre2_find_bracket.c + src/pcre2_fuzzsupport.c + src/pcre2_internal.h + src/pcre2_intmodedep.h + src/pcre2_jit_char_inc.h + src/pcre2_jit_compile.c + src/pcre2_jit_match.c + src/pcre2_jit_misc.c + src/pcre2_jit_neon_inc.h + src/pcre2_jit_simd_inc.h + src/pcre2_jit_test.c + src/pcre2_maketables.c + src/pcre2_match.c + src/pcre2_match_data.c + src/pcre2_newline.c + src/pcre2_ord2utf.c + src/pcre2_pattern_info.c + src/pcre2_printint.c + src/pcre2_script_run.c + src/pcre2_serialize.c + src/pcre2_string_utils.c + src/pcre2_study.c + src/pcre2_substitute.c + src/pcre2_substring.c + src/pcre2_tables.c + src/pcre2_ucd.c + src/pcre2_ucp.h + src/pcre2_ucptables.c + src/pcre2_util.h + src/pcre2_valid_utf.c + src/pcre2_xclass.c + src/pcre2demo.c + src/pcre2grep.c + src/pcre2posix.c + src/pcre2posix.h + src/pcre2posix_test.c + src/pcre2test.c + ) + +echo Detrailing +perl maint/Detrail "${txt_files[@]}" "${c_files[@]}" + +echo Validating all text +perl maint/CheckTxt "${files[@]}" +perl maint/CheckTxt -ascii "${c_files[@]}" +perl maint/CheckTxt -crlf "${crlf_files[@]}" + +# Verify the version number in the Bazel file +if ! grep -E "version = \"$CURRENT_RELEASE\"" MODULE.bazel >/dev/null ; then + echo "Version number in MODULE.bazel does not match current release" + exit 1 +fi + +echo Done + +#End diff --git a/3rd/pcre2/maint/README b/3rd/pcre2/maint/README new file mode 100644 index 00000000..7b912996 --- /dev/null +++ b/3rd/pcre2/maint/README @@ -0,0 +1,506 @@ +MAINTENANCE README FOR PCRE2 +============================ + +The files in the "maint" directory of the PCRE2 source contain data, scripts, +and programs that are used for the maintenance of PCRE2, but which do not form +part of the PCRE2 distribution tarballs. This document describes these files +and also contains some notes for maintainers. Its contents are: + + Files in the maint directory + Updating to a new Unicode release + Preparing for a PCRE2 release + Making a PCRE2 release + Long-term ideas (wish list) + +For a description of the way PCRE2 works, see the file called HACKING in the +top directory. + + +Files in the maint directory +============================ + +132html + A Perl script to convert man pages to HTML (.1 and .3 files "two" HTML), + used by PrepareRelease. + +CheckMan + A Perl script to validate the syntax in PCRE2 man pages, used by + PrepareRelease. + +CheckMan + A Perl script to clean up the nroff output in PCRE2 man pages, used by + PrepareRelease. + +Detrail + A Perl script to remove trailing whitespace from PCRE2 files, used by + PrepareRelease. + +GenerateCommon.py + A Python module containing data and functions that are used by the other + Generate scripts. + +GenerateTest.py + A Python script that generates input and expected output test data for tests + 26 or 27, which tests certain aspects of Unicode property support. + +GenerateUcd.py + A Python script that generates the file pcre2_ucd.c from GenerateCommon.py + and Unicode data files, which are themselves downloaded from the Unicode web + site. The generated file contains the tables for a 2-stage lookup of Unicode + properties, along with some auxiliary tables. The script starts with a long + comment that gives details of the tables it constructs. + +GenerateUcpHeader.py + A Python script that generates the file pcre2_ucp.h from GenerateCommon.py + and Unicode data files. The generated file defines constants for various + Unicode property values. + +GenerateUcpTables.py + A Python script that generates the file pcre2_ucptables.c from + GenerateCommon.py and Unicode data files. The generated file contains tables + for looking up Unicode property names. + +manifest-* + Data files used to verify the contents of the distribution tarball and + `make install` file lists. + +ManyConfigTests + A shell script that runs "configure, make, test" a number of times with + different configuration settings. + +PrepareRelease + A shell script to ensure that all auto-generated outputs are ready for + release. + +pcre2_chartables.c.non-standard + This is a set of character tables that came from a Windows system. It has + characters greater than 128 that are set as spaces, amongst other things. I + kept it so that it can be used for testing from time to time. + +README + This file. + +RunManifestTest +RunManifestTest.ps1 + Scripts to generate and verify a list of files against an expected 'manifest' + detailing what the directory should contain. + +Unicode.tables + The files in this directory were downloaded from the Unicode web site. They + contain information about Unicode characters and scripts, and are used by the + Generate scripts. There is also UnicodeData.txt, which is no longer used by + any script, because it is useful occasionally for manually looking up the + details of certain characters. However, note that character names in this + file such as "Arabic sign sanah" do NOT mean that the character is in a + particular script (in this case, Arabic). Scripts.txt and + ScriptExtensions.txt are where to look for script information. + +ucptest.c + A program for testing the Unicode property macros that do lookups in the + pcre2_ucd.c data, mainly useful after rebuilding the Unicode property tables. + Compile and run this in the "maint" directory (see comments at its head). + This program can also be used to find characters with specific properties and + to list which properties are supported. + +ucptestdata + A directory containing four files, testinput{1,2} and testoutput{1,2}, for + use in conjunction with the ucptest program. + +utf8.c + A short, freestanding C program for converting a Unicode code point into a + sequence of bytes in the UTF-8 encoding, and vice versa. If its argument is a + hex number such as 0x1234, it outputs a list of the equivalent UTF-8 bytes. + If its argument is a sequence of concatenated UTF-8 bytes (e.g. 12e188b4) it + treats them as a UTF-8 string and outputs the equivalent code points in hex. + See comments at its head for details. + + +Updating to a new Unicode release +================================= + +When there is a new release of Unicode, the files in Unicode.tables must be +refreshed from the Unicode web site. Once that is done, the four Python scripts +that generate files from the Unicode data can be run from within the "maint" +directory. Note that the format used for those files is not stable, and +therefore changes to the scripts might be needed to support new versions. + +Note: Previously, it was necessary to update lists of scripts and their +abbreviations by hand before running the Python scripts. This is no longer +necessary because the scripts have been upgraded to extract this information +themselves. Also, there used to be explicit lists of scripts in two of the man +pages. This is no longer the case; the pcre2test program can now output a list +of supported scripts, and the command to do so is part of the documentation. + +You can give an output file name as an argument to the following scripts, but +by default: + +GenerateUcd.py creates pcre2_ucd.c ) +GenerateUcpHeader.py creates pcre2_ucp.h ) in the current directory +GenerateUcpTables.py creates pcre2_ucptables.c ) + +These files can be compared against the existing versions in the src directory +to check on any changes before replacing the old files, but you can also +generate directly into the final location by running: + +./GenerateUcd.py ../src/pcre2_ucd.c +./GenerateUcpHeader.py ../src/pcre2_ucp.h +./GenerateUcpTables.py ../src/pcre2_ucptables.c + +Once the .c and .h files are in the ../src directory, the ucptest program can +be compiled and used to check that the new tables work properly. The data files +in ucptestdata are set up to check a number of test characters. See the +comments at the start of ucptest.c. Depending of the type of changes, adding +tests for new scripts, properties or characters to the files in ucptestdata +is recommended. Make sure to regenerate and validate the output files after. + +Finally, you should run the GenerateTest.py script to regenerate new versions +of the input and expected output from a series of Unicode property tests that +are automatically generated from the Unicode data files. By default, the files +are written to testinput and testoutput in the current directory, but they +should be moved to replace the files inside the main testdata directory and +that are being used for tests 27 or 26. + +In summary: + +``` +./GenerateUcd.py ../src/pcre2_ucd.c +./GenerateUcpHeader.py ../src/pcre2_ucp.h +./GenerateUcpTables.py ../src/pcre2_ucptables.c +./GenerateTest.py +mv testinput ../testdata/testinput27 +mv testoutput ../testdata/testoutput27 + +...compile ucptest.c +for i in 1 2; do + ./ucptest < ucptestdata/testinput$i > testoutput$i + diff -U3 testoutput$i ucptestdata/testoutput$i +done +``` + + +Preparing for a PCRE2 release +============================= + +This section contains a checklist of things that I (PH) do before building a +new release. + +. Ensure that the version number and version date are correct in configure.ac. + +. Update the library version numbers in configure.ac according to the rules + given below. + +. If new build options or new source files have been added, ensure that they + are added to the CMake files as well as to the autoconf files. The relevant + files are CMakeLists.txt and config-cmake.h.in. After making a release, test + it out with CMake if there have been changes here. + +. Run ./autogen.sh to ensure everything is up-to-date. + +. Run the script maint/ManyConfigTests. This compiles and runs the tests for + many different sets of configuration options, some with valgrind. It can take + quite a long time. + +. Run tests in both 32-bit and 64-bit environments if possible. I can no longer + run 32-bit tests. + +. Run tests with two or more different compilers (e.g. clang and gcc), and make + use of -fsanitize=address and friends where possible. For gcc, + -fsanitize=undefined -std=gnu99 picks up undefined behaviour at runtime. For + clang, -fsanitize=address,undefined,integer can be used but an exception is + needed to allow XCLASS with very large ranges in the 32-bit library so it + should be followed by -fno-sanitize=unsigned-shift-base, additionally + -fno-sanitize=unsigned-integer-overflow must be added when compiling with + JIT. Newer versions of clang also need -fno-sanitize=function, at least + until pcre2test stops using generic pointers on its callbacks. Another + useful clang option is -fsanitize=signed-integer-overflow but that should + be already included if using "integer". + +. Do a test build using CMake. Remove src/config.h first, lest it override the + version that CMake creates. Also ensure there is no leftover CMakeCache.txt + in the directory you are testing in. + +. Remove the CMake cache and then check that a CMake unity build works: + [c]cmake -DCMAKE_UNITY_BUILD=ON sets up a unity build. + +. Run perltest.sh on the test data for tests 1 and 4. The output should match + the PCRE2 test output, apart from the version identification at the start of + each test. Sometimes there are other differences in test 4 if PCRE2 and Perl + are using different Unicode releases. The other tests are not Perl-compatible + (they use various PCRE2-specific features or options). The maint/RunPerlTest + shell script can be used to do this testing in Unix-like environment. + +. It is possible to test with the emulated memmove() function by undefining + HAVE_MEMMOVE and HAVE_BCOPY in config.h, though I do not do this often. + +. Documentation: check AUTHORS, ChangeLog (check version and date), LICENCE, + NEWS (check version and date), NON-AUTOTOOLS-BUILD, and README. Many of these + won't need changing, but over the long term things do change. + +. I used to test new releases myself on a number of different operating + systems. For example, on Solaris it is helpful to test using Sun's cc + compiler as a change from gcc. Adding -m64 to the cc options does a 64-bit + build. Since I retired I can no longer do much of this. There are automated + tests under Ubuntu, Alpine, macOS and Windows that are now set up as GitHub + actions. Check that they are running clean. + +. The buildbots at http://buildfarm.opencsw.org/ do some automated testing + of PCRE2 and should also be checked before putting out a release. (June 2024: + I am not sure these are currently working properly.) + + +Updating version info for libtool +================================= + +This set of rules for updating library version information came from a web page +whose URL I have forgotten. The version information consists of three parts: +(current, revision, age). + +1. Start with version information of 0:0:0 for each libtool library. + +2. Update the version information only immediately before a public release of + your software. More frequent updates are unnecessary, and only guarantee + that the current interface number gets larger faster. + +3. If the library source code has changed at all since the last update, then + increment revision; c:r:a becomes c:r+1:a. + +4. If any interfaces have been added, removed, or changed since the last + update, increment current, and set revision to 0. + +5. If any interfaces have been added since the last public release, then + increment age. + +6. If any interfaces have been removed or changed since the last public + release, then set age to 0. + +The following explanation may help in understanding the above rules a bit +better. Consider that there are three possible kinds of reaction from users to +changes in a shared library: + +1. Programs using the previous version may use the new version as a drop-in + replacement, and programs using the new version can also work with the + previous one. In other words, no recompiling nor relinking is needed. In + this case, increment revision only, don't touch current or age. + +2. Programs using the previous version may use the new version as a drop-in + replacement, but programs using the new version may use APIs not present in + the previous one. In other words, a program linking against the new version + may fail if linked against the old version at run time. In this case, set + revision to 0, increment current and age. + +3. Programs may need to be changed, recompiled, relinked in order to use the + new version. Increment current, set revision and age to 0. + + +Making a PCRE2 release +====================== + +Run PrepareRelease and commit the files that it changes. The first thing this +script does is to run CheckMan on the man pages; if it finds any markup errors, +it reports them and then aborts. Otherwise it removes trailing spaces from +sources and refreshes the HTML documentation. Update the GitHub repository. + +Once PrepareRelease has run clean, run "make distcheck" to create the tarballs +and the zipball. I then sign these files. Double-check with "git status" that +the repository is fully up-to-date, then create a new tag and a release on +GitHub. Upload the tarballs, zipball, and the signatures as "assets" of the +GitHub release. + + +Future ideas (wish list) +======================== + +This section records a list of ideas so that they do not get forgotten. They +vary enormously in their usefulness and potential for implementation. Some are +very sensible; some are rather wacky. Some have been on this list for many +years. + +. Optimization + + There are always ideas for new optimizations so as to speed up pattern + matching. Most of them try to save work by recognizing a non-match without + having to scan all the possibilities. These are some that I've recorded: + + * /((A{0,5}){0,5}){0,5}(something complex)/ on a non-matching string is very + slow, though Perl is fast. Can we speed up somehow? Convert to {0,125}? + OTOH, this is pathological - the user could easily fix it. + + * Turn ={4} into ==== ? (for speed). I once did an experiment, and it seems + to have little effect, and maybe makes things worse. + + * "Ends with literal string" - note that a single character doesn't gain much + over the existing "required code unit" feature that just remembers one code + unit. + + * Remember an initial string rather than just 1 code unit. + + * A required code unit from alternatives - not just the last unit, but an + earlier one if common to all alternatives. + + * Friedl contains other ideas. + + * The code does not set initial code unit flags for Unicode property types + such as \p; I don't know how much benefit there would be for, for example, + setting the bits for 0-9 and all values >= xC0 (in 8-bit mode) when a + pattern starts with \p{N}. + +. Perl and PCRE2 sometimes differ in the settings of capturing subpatterns + inside repeats. One example of the difference is the matching of + /(main(O)?)+/ against mainOmain, where PCRE2 leaves $2 set. In Perl, it's + unset. Changing this in PCRE2 will be very hard because I think it needs much + more state to be remembered. + +. A feature to suspend a match via a callout was once requested. + +. An option to convert results into character offsets and character lengths. + +. A (non-Unix) user wanted pcregrep options to (a) list a file name just once, + preceded by a blank line, instead of adding it to every matched line, and (b) + support --outputfile=name. + +. Define a union for the results from pcre2_pattern_info(). + +. Provide a "random access to the subject" facility so that the way in which it + is stored is independent of PCRE2. For efficiency, it probably isn't possible + to switch this dynamically. It would have to be specified when PCRE2 was + compiled. PCRE2 would then call a function every time it wanted a character. + +. pcre2grep: add -rs for a sorted recurse. Having to store file names and sort + them will of course slow it down. + +. Someone suggested --disable-callout to save code space when callouts are + never wanted. This seems rather marginal. + +. A user suggested a parameter to limit the length of string matched, for + example if the parameter is N, the current match should fail if the matched + substring exceeds N. This could apply to both match functions. The value + could be a new field in the match context. Compare the offset_limit feature, + which limits where a match must start. + +. Write a function that generates random matching strings for a compiled + pattern. + +. Pcre2grep: an option to specify the output line separator, either as a string + or select from a fixed list. This is not straightforward, because at the + moment it outputs whatever is in the input file. + +. Improve the code for duplicate checking in pcre2_dfa_match(). An incomplete, + non-thread-safe patch showed that this can help performance for patterns + where there are many alternatives. However, a simple thread-safe + implementation that I tried made things worse in many simple cases, so this + is not an obviously good thing. + +. PCRE2 cannot at present distinguish between subpatterns with different names, + but the same number (created by the use of ?|). In order to do so, a way of + remembering *which* subpattern numbered n matched is needed. (*MARK) can + perhaps be used as a way round this problem. However, note that Perl does not + distinguish: like PCRE2, a name is just an alias for a number in Perl. + +. Instead of having #ifdef HAVE_CONFIG_H in each module, put #include + "something" and the the #ifdef appears only in one place, in "something". + +. Implement something like (?(R2+)... to check outer recursions. + +. If Perl ever supports the POSIX notation [[.something.]] PCRE2 should try + to follow. + +. A user wanted a way of ignoring all Unicode "mark" characters so that, for + example "a" followed by an accent would, together, match "a". This can only + be done clumsily at present by using a lookahead such as /(?=a)\X/, which + works for "combining" characters. + +. Perl supports [\N{x}-\N{y}] as a Unicode range, even in EBCDIC. PCRE2 + supports \N{U+dd..} everywhere, but not in EBCDIC. + +. Unicode stuff from Perl: + + \b{gcb} or \b{g} grapheme cluster boundary + \b{sb} sentence boundary + \b{wb} word boundary + + See Unicode TR 29. The last two are very much aimed at natural language. + +. Allow a callout to specify a number of characters to skip. This can be done + compatibly via an extra callout field. + +. Allow callouts to return *PRUNE, *COMMIT, *THEN, *SKIP, with and without + continuing (that is, with and without an implied *FAIL). A new option, + PCRE2_CALLOUT_EXTENDED say, would be needed. This is unlikely ever to be + implemented by JIT, so this could be an option for pcre2_match(). + +. A limit on substitutions: a user suggested somehow finding a way of making + match_limit apply to the whole operation instead of each match separately. + +. Some #defines could be replaced with enums to improve robustness. + +. There was a request for an option for pcre2_match() to return the longest + match. This would mean searching for all possible matches, of course. + +. A neater way of handling recursion file names in pcre2grep, e.g. a single + buffer that can grow. See also GitHub issue #2 (recursion looping via + symlinks). + +. A user suggested that before/after parameters in pcre2grep could have + negative values, to list lines near to the matched line, but not necessarily + the line itself. For example, --before-context=-1 would list the line *after* + each matched line, without showing the matched line. The problem here is what + to do with matches that are close together. Maybe a simpler way would be a + flag to disable showing matched lines, only valid with either -A or -B? + +. There was a suggestion for a pcre2grep colour default, or possibly a more + general PCRE2GREP_OPT, but only for some options - not file names or patterns. + +. Breaking loops that match an empty string: perhaps find a way of continuing + if *something* has changed, but this might mean remembering additional data. + "Something" could be a capture value, but then a list of previous values + would be needed to avoid a cycle of changes. + +. If a function could be written to find 3-character (or other length) fixed + strings, at least one of which must be present for a match, efficient + pre-searching of large datasets could be implemented. + +. If pcre2grep had --first-line (match only in the first line) it could be + efficiently used to find files "starting with xxx". What about --last-line? + There was also the suggestion of an option for pcre2grep to scan only the + start of a file. I am not keen - this is the job of "head". + +. A user requested a means of determining whether a failed match was failed by + the start-of-match optimizations, or by running the match engine. Easy enough + to define a bit in the match data, but all three matchers would need work. + +. Would inlining "simple" recursions provide a useful performance boost for the + interpreters? JIT already does some of this, but it may not be worth it for + the interpreters. + +. Redesign handling of class/nclass/xclass because the compile code logic is + currently very contorted and obscure. Also there was a request for a way of + re-defining \w (and therefore \W, \b, and \B). An in-pattern sequence such as + (?w=[...]) was suggested. Easiest way would be simply to inline the class, + with lookarounds for \b and \B. Ideally the setting should last till the end + of the group, which means remembering all previous settings; maybe a fixed + amount of stack would do - how deep would anyone want to nest these things? + +. A user suggested something like --with-build-info to set a build information + string that could be retrieved by pcre2_config(). However, there's no + facility for a length limit in pcre2_config(), and what would be the + encoding? + +. Quantified groups with a fixed count currently operate by replicating the + group in the compiled bytecode. This may not really matter in these days of + gigabyte memory, but perhaps another implementation might be considered. + Needs coordination between the interpreters and JIT. + +. The POSIX interface is no longer POSIX compatible, because regoff_t is still + defined as an int. + +. The POSIX interface is not thread safe because it modifies a pcre2_match + inside its regex_t while doing matching. A thread safe version that uses + a thread local object has been proposed but it will require that the code + requires at least C11 compatibility. + +. See also any suggestions in the GitHub issues. + +Philip Hazel +Email local part: Philip.Hazel +Email domain: gmail.com +Last updated: 22 August 2024 diff --git a/3rd/pcre2/maint/RunManifestTest b/3rd/pcre2/maint/RunManifestTest new file mode 100644 index 00000000..e7a082ca --- /dev/null +++ b/3rd/pcre2/maint/RunManifestTest @@ -0,0 +1,47 @@ +#! /bin/sh + +# Script to test a directory listing. We use this to verify that the list of +# files installed by "make install" or "cmake --install" matches what we expect. + +LANG=C # Ensure stable ordering of `sort` output +export LANG + +if [ "$1" = "" -o "$2" = "" ] ; then + echo "Usage: $0 " >&2 + exit 1 +fi + +input_dir="$1" +expected_manifest="$2" + +base=`basename $expected_manifest` + +sed=sed +grep=grep +# Helpers for Solaris +if [ -f /usr/bin/gsed ] ; then + sed=/usr/bin/gsed +fi +if [ -f /usr/bin/ggrep ] ; then + grep=/usr/bin/ggrep +fi + +find "$input_dir" -print | \ + sort | \ + xargs -n1 -- ls -l -d -n | \ + $sed -E -e 's/ {2,}/ /g' | \ + cut -d' ' -f '1,9-' \ + > "$base" + +if ! diff -u "$expected_manifest" "$base"; then + echo "Installed files differ from expected" >&2 + + echo "===Actual===" >&2 + cat "$base" >&2 + echo "===End===" >&2 + + exit 1 +fi + +echo "Installed files match expected" +rm -f "$base" diff --git a/3rd/pcre2/maint/RunManifestTest.ps1 b/3rd/pcre2/maint/RunManifestTest.ps1 new file mode 100644 index 00000000..796d11fd --- /dev/null +++ b/3rd/pcre2/maint/RunManifestTest.ps1 @@ -0,0 +1,36 @@ +# Script to test a directory listing. We use this to verify that the list of +# files installed by "make install" or "cmake --install" matches what we expect. + +param ( + [Parameter(Mandatory=$true)] + [string]$inputDir, + + [Parameter(Mandatory=$true)] + [string]$manifestName +) + +if ((-not $inputDir) -or (-not $manifestName)) { + throw "Usage: .\RunManifestTest.ps1 " +} + +$base = [System.IO.Path]::GetFileName($manifestName) + +$installedFiles = Get-ChildItem -Recurse -Force -Path $inputDir | + Sort-Object {[System.BitConverter]::ToString([system.Text.Encoding]::UTF8.GetBytes($_.FullName))} | + ForEach-Object { $_.Mode.Substring(0,5) + " " + ($_.FullName | Resolve-Path -Relative) } + +$null = New-Item -Force $base -Value (($installedFiles | Out-String) -replace "`r`n", "`n") + +$expectedFiles = Get-Content -Path $manifestName -Raw +$actualFiles = Get-Content -Path $base -Raw + +if ($expectedFiles -ne $actualFiles) { + Write-Host "===Actual===" + Write-Host $actualFiles + Write-Host "===End===" + + throw "Installed files differ from expected" +} + +Write-Host "Installed files match expected" +Remove-Item -Path $base -Force diff --git a/3rd/pcre2/maint/RunPerlTest b/3rd/pcre2/maint/RunPerlTest new file mode 100644 index 00000000..fad93132 --- /dev/null +++ b/3rd/pcre2/maint/RunPerlTest @@ -0,0 +1,90 @@ +#! /bin/sh + +# Script to run the Perl-compatible PCRE2 tests through Perl. For testing +# with different versions of Perl, if the first argument is "-perl" then the +# second is taken as the Perl command to use, and both are then removed. +# +# The argument can be the number of the specific Perl compatible test to run +# (ex: "1", "4", "26" or "27"), otherwise it runs all tests and returns at +# exit, the test number with an incorrect output or the test number plus 32 +# if it failed to run completely. It returns with 0 on success. + +# This script should be run with the main PCRE2 directory current. + +if [ "$1" = "-perl" ]; then + PERL="$2" + ARGS="$1 $PERL" + shift 2 +else + PERL=perl + ARGS="" +fi + +RC=0 + +if [ -z "$1" ] || [ "$1" = "1" ]; then +echo "-----------------------------------------------------------------" +echo "Perl test: main functionality (PCRE2 test 1)" +if ./perltest.sh $ARGS testdata/testinput1 testtry; then + tail -n +2 testtry > testtry2 + diff -u testdata/testoutput1 testtry2 || RC=1 + /bin/rm -rf testtry2 +else + RC=33 +fi +echo "" +fi + +if [ -z "$1" ] || [ "$1" = "4" ]; then +echo "-----------------------------------------------------------------" +echo "Perl test: UTF-8 and Unicode property features (PCRE2 test 4)" +if ./perltest.sh $ARGS -utf8 testdata/testinput4 testtry; then + tail -n +2 testtry > testtry2 + diff -u testdata/testoutput4 testtry2 || RC=4 + /bin/rm -rf testtry2 +else + RC=36 +fi +echo "" +fi + +P=$($PERL -MUnicode::UCD -e 'print Unicode::UCD::UnicodeVersion, "\n"') + +if [ -z "$1" ] || [ "$1" = "26" ]; then +echo "-----------------------------------------------------------------" +echo "Perl test: Unicode property tests (PCRE2 test 26)" +U=$(head -5 testdata/testinput26 | $PERL -ne 'print "$1\n" if /tests for version ([\d.]+)$/') +if [ "$U" != "$P" ]; then + echo "SKIPPED: Perl uses Unicode $P but version $U was expected" +else + if ./perltest.sh $ARGS testdata/testinput26 testtry; then + tail -n +2 testtry > testtry2 + diff -u testdata/testoutput26 testtry2 || RC=26 + /bin/rm -rf testtry2 + else + RC=58 + fi + echo "" + fi +fi + +if [ -z "$1" ] || [ "$1" = "27" ]; then +echo "-----------------------------------------------------------------" +echo "Perl test: Unicode property tests (PCRE2 test 27)" +U=$(head -5 testdata/testinput27 | $PERL -ne 'print "$1\n" if /tests for version ([\d.]+)$/') +if [ "$U" != "$P" ]; then + echo "SKIPPED: Perl uses Unicode $P but version $U was expected" +else + if ./perltest.sh $ARGS testdata/testinput27 testtry; then + tail -n +2 testtry > testtry2 + diff -u testdata/testoutput27 testtry2 || RC=27 + /bin/rm -rf testtry2 + else + RC=59 + fi + echo "" + fi +fi + +exit $RC +# End diff --git a/3rd/pcre2/maint/Unicode.tables/BidiMirroring.txt b/3rd/pcre2/maint/Unicode.tables/BidiMirroring.txt new file mode 100644 index 00000000..d8f60cb7 --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/BidiMirroring.txt @@ -0,0 +1,636 @@ +# BidiMirroring-16.0.0.txt +# Date: 2024-01-30 +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# Bidi_Mirroring_Glyph Property +# +# This file is an informative contributory data file in the +# Unicode Character Database. +# +# This data file lists characters that have the Bidi_Mirrored=Yes property +# value, for which there is another Unicode character that typically has a glyph +# that is the mirror image of the original character's glyph. +# +# The repertoire covered by the file is Unicode 16.0.0. +# +# The file contains a list of lines with mappings from one code point +# to another one for character-based mirroring. +# Note that for "real" mirroring, a rendering engine needs to select +# appropriate alternative glyphs, and that many Unicode characters do not +# have a mirror-image Unicode character. +# +# Each mapping line contains two fields, separated by a semicolon (';'). +# Each of the two fields contains a code point represented as a +# variable-length hexadecimal value with 4 to 6 digits. +# A comment indicates where the characters are "BEST FIT" mirroring. +# +# Code points for which Bidi_Mirrored=Yes, but for which no appropriate +# characters exist with mirrored glyphs, are +# listed as comments at the end of the file. +# +# Formally, the default value of the Bidi_Mirroring_Glyph property +# for each code point is , unless a mapping to +# some other character is specified in this data file. When a code +# point has the default value for the Bidi_Mirroring_Glyph property, +# that means that no other character exists whose glyph is suitable +# for character-based mirroring. +# +# For information on bidi mirroring, see UAX #9: Unicode Bidirectional Algorithm, +# at https://www.unicode.org/reports/tr9/ +# +# This file was originally created by Markus Scherer. +# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler, +# and for subsequent versions by Ken Whistler, Laurentiu Iancu, Roozbeh Pournader, +# and Robin Leroy. +# +# Historical and Compatibility Information: +# +# The OpenType Mirroring Pairs List (OMPL) is frozen to match the +# Unicode 5.1 version of the Bidi_Mirroring_Glyph property (2008). +# See https://www.microsoft.com/typography/otspec/ompl.txt +# +# The Unicode 6.1 version of the Bidi_Mirroring_Glyph property (2011) +# added one mirroring pair: 27CB <--> 27CD. +# +# The Unicode 11.0 version of the Bidi_Mirroring_Glyph property (2018) +# underwent a substantial revision, to formally recognize all of the +# exact mirroring pairs and "BEST FIT" mirroring pairs that had been +# added after the freezing of the OMPL list. As a result, starting +# with Unicode 11.0, the bmg mapping values more accurately reflect +# the current status of glyphs for Bidi_Mirrored characters in +# the Unicode Standard, but this listing now extends significantly +# beyond the frozen OMPL list. Implementers should be aware of this +# intentional distinction. +# +# ############################################################ +# +# Property: Bidi_Mirroring_Glyph +# +# @missing: 0000..10FFFF; + +0028; 0029 # LEFT PARENTHESIS +0029; 0028 # RIGHT PARENTHESIS +003C; 003E # LESS-THAN SIGN +003E; 003C # GREATER-THAN SIGN +005B; 005D # LEFT SQUARE BRACKET +005D; 005B # RIGHT SQUARE BRACKET +007B; 007D # LEFT CURLY BRACKET +007D; 007B # RIGHT CURLY BRACKET +00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0F3A; 0F3B # TIBETAN MARK GUG RTAGS GYON +0F3B; 0F3A # TIBETAN MARK GUG RTAGS GYAS +0F3C; 0F3D # TIBETAN MARK ANG KHANG GYON +0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS +169B; 169C # OGHAM FEATHER MARK +169C; 169B # OGHAM REVERSED FEATHER MARK +2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2045; 2046 # LEFT SQUARE BRACKET WITH QUILL +2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL +207D; 207E # SUPERSCRIPT LEFT PARENTHESIS +207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS +208D; 208E # SUBSCRIPT LEFT PARENTHESIS +208E; 208D # SUBSCRIPT RIGHT PARENTHESIS +2208; 220B # ELEMENT OF +2209; 220C # [BEST FIT] NOT AN ELEMENT OF +220A; 220D # SMALL ELEMENT OF +220B; 2208 # CONTAINS AS MEMBER +220C; 2209 # [BEST FIT] DOES NOT CONTAIN AS MEMBER +220D; 220A # SMALL CONTAINS AS MEMBER +2215; 29F5 # DIVISION SLASH +221F; 2BFE # RIGHT ANGLE +2220; 29A3 # ANGLE +2221; 299B # MEASURED ANGLE +2222; 29A0 # SPHERICAL ANGLE +2224; 2AEE # DOES NOT DIVIDE +223C; 223D # TILDE OPERATOR +223D; 223C # REVERSED TILDE +2243; 22CD # ASYMPTOTICALLY EQUAL TO +2245; 224C # APPROXIMATELY EQUAL TO +224C; 2245 # ALL EQUAL TO +2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO +2254; 2255 # COLON EQUALS +2255; 2254 # EQUALS COLON +2264; 2265 # LESS-THAN OR EQUAL TO +2265; 2264 # GREATER-THAN OR EQUAL TO +2266; 2267 # LESS-THAN OVER EQUAL TO +2267; 2266 # GREATER-THAN OVER EQUAL TO +2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO +2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO +226A; 226B # MUCH LESS-THAN +226B; 226A # MUCH GREATER-THAN +226E; 226F # [BEST FIT] NOT LESS-THAN +226F; 226E # [BEST FIT] NOT GREATER-THAN +2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO +2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO +2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO +2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO +2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO +2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO +2276; 2277 # LESS-THAN OR GREATER-THAN +2277; 2276 # GREATER-THAN OR LESS-THAN +2278; 2279 # [BEST FIT] NEITHER LESS-THAN NOR GREATER-THAN +2279; 2278 # [BEST FIT] NEITHER GREATER-THAN NOR LESS-THAN +227A; 227B # PRECEDES +227B; 227A # SUCCEEDS +227C; 227D # PRECEDES OR EQUAL TO +227D; 227C # SUCCEEDS OR EQUAL TO +227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO +227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO +2280; 2281 # [BEST FIT] DOES NOT PRECEDE +2281; 2280 # [BEST FIT] DOES NOT SUCCEED +2282; 2283 # SUBSET OF +2283; 2282 # SUPERSET OF +2284; 2285 # [BEST FIT] NOT A SUBSET OF +2285; 2284 # [BEST FIT] NOT A SUPERSET OF +2286; 2287 # SUBSET OF OR EQUAL TO +2287; 2286 # SUPERSET OF OR EQUAL TO +2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO +2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO +228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO +228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO +228F; 2290 # SQUARE IMAGE OF +2290; 228F # SQUARE ORIGINAL OF +2291; 2292 # SQUARE IMAGE OF OR EQUAL TO +2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO +2298; 29B8 # CIRCLED DIVISION SLASH +22A2; 22A3 # RIGHT TACK +22A3; 22A2 # LEFT TACK +22A6; 2ADE # ASSERTION +22A8; 2AE4 # TRUE +22A9; 2AE3 # FORCES +22AB; 2AE5 # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +22B0; 22B1 # PRECEDES UNDER RELATION +22B1; 22B0 # SUCCEEDS UNDER RELATION +22B2; 22B3 # NORMAL SUBGROUP OF +22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP +22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO +22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6; 22B7 # ORIGINAL OF +22B7; 22B6 # IMAGE OF +22B8; 27DC # MULTIMAP +22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB; 22CC # LEFT SEMIDIRECT PRODUCT +22CC; 22CB # RIGHT SEMIDIRECT PRODUCT +22CD; 2243 # REVERSED TILDE EQUALS +22D0; 22D1 # DOUBLE SUBSET +22D1; 22D0 # DOUBLE SUPERSET +22D6; 22D7 # LESS-THAN WITH DOT +22D7; 22D6 # GREATER-THAN WITH DOT +22D8; 22D9 # VERY MUCH LESS-THAN +22D9; 22D8 # VERY MUCH GREATER-THAN +22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN +22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN +22DC; 22DD # EQUAL TO OR LESS-THAN +22DD; 22DC # EQUAL TO OR GREATER-THAN +22DE; 22DF # EQUAL TO OR PRECEDES +22DF; 22DE # EQUAL TO OR SUCCEEDS +22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL +22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL +22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO +22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO +22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO +22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO +22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO +22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO +22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO +22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO +22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF +22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP +22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO +22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS +22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS +22F2; 22FA # ELEMENT OF WITH LONG HORIZONTAL STROKE +22F3; 22FB # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22F4; 22FC # SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22F6; 22FD # ELEMENT OF WITH OVERBAR +22F7; 22FE # SMALL ELEMENT OF WITH OVERBAR +22FA; 22F2 # CONTAINS WITH LONG HORIZONTAL STROKE +22FB; 22F3 # CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22FC; 22F4 # SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22FD; 22F6 # CONTAINS WITH OVERBAR +22FE; 22F7 # SMALL CONTAINS WITH OVERBAR +2308; 2309 # LEFT CEILING +2309; 2308 # RIGHT CEILING +230A; 230B # LEFT FLOOR +230B; 230A # RIGHT FLOOR +2329; 232A # LEFT-POINTING ANGLE BRACKET +232A; 2329 # RIGHT-POINTING ANGLE BRACKET +2768; 2769 # MEDIUM LEFT PARENTHESIS ORNAMENT +2769; 2768 # MEDIUM RIGHT PARENTHESIS ORNAMENT +276A; 276B # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B; 276A # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C; 276D # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D; 276C # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E; 276F # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT +2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT +27C3; 27C4 # OPEN SUBSET +27C4; 27C3 # OPEN SUPERSET +27C5; 27C6 # LEFT S-SHAPED BAG DELIMITER +27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER +27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET +27C9; 27C8 # SUPERSET PRECEDING SOLIDUS +27CB; 27CD # MATHEMATICAL RISING DIAGONAL +27CD; 27CB # MATHEMATICAL FALLING DIAGONAL +27D5; 27D6 # LEFT OUTER JOIN +27D6; 27D5 # RIGHT OUTER JOIN +27DC; 22B8 # LEFT MULTIMAP +27DD; 27DE # LONG RIGHT TACK +27DE; 27DD # LONG LEFT TACK +27E2; 27E3 # WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK +27E3; 27E2 # WHITE CONCAVE-SIDED DIAMOND WITH RIGHTWARDS TICK +27E4; 27E5 # WHITE SQUARE WITH LEFTWARDS TICK +27E5; 27E4 # WHITE SQUARE WITH RIGHTWARDS TICK +27E6; 27E7 # MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7; 27E6 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8; 27E9 # MATHEMATICAL LEFT ANGLE BRACKET +27E9; 27E8 # MATHEMATICAL RIGHT ANGLE BRACKET +27EA; 27EB # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB; 27EA # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC; 27ED # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED; 27EC # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE; 27EF # MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF; 27EE # MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983; 2984 # LEFT WHITE CURLY BRACKET +2984; 2983 # RIGHT WHITE CURLY BRACKET +2985; 2986 # LEFT WHITE PARENTHESIS +2986; 2985 # RIGHT WHITE PARENTHESIS +2987; 2988 # Z NOTATION LEFT IMAGE BRACKET +2988; 2987 # Z NOTATION RIGHT IMAGE BRACKET +2989; 298A # Z NOTATION LEFT BINDING BRACKET +298A; 2989 # Z NOTATION RIGHT BINDING BRACKET +298B; 298C # LEFT SQUARE BRACKET WITH UNDERBAR +298C; 298B # RIGHT SQUARE BRACKET WITH UNDERBAR +298D; 2990 # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E; 298F # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F; 298E # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990; 298D # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991; 2992 # LEFT ANGLE BRACKET WITH DOT +2992; 2991 # RIGHT ANGLE BRACKET WITH DOT +2993; 2994 # LEFT ARC LESS-THAN BRACKET +2994; 2993 # RIGHT ARC GREATER-THAN BRACKET +2995; 2996 # DOUBLE LEFT ARC GREATER-THAN BRACKET +2996; 2995 # DOUBLE RIGHT ARC LESS-THAN BRACKET +2997; 2998 # LEFT BLACK TORTOISE SHELL BRACKET +2998; 2997 # RIGHT BLACK TORTOISE SHELL BRACKET +299B; 2221 # MEASURED ANGLE OPENING LEFT +29A0; 2222 # SPHERICAL ANGLE OPENING LEFT +29A3; 2220 # REVERSED ANGLE +29A4; 29A5 # ANGLE WITH UNDERBAR +29A5; 29A4 # REVERSED ANGLE WITH UNDERBAR +29A8; 29A9 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT +29A9; 29A8 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT +29AA; 29AB # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT +29AB; 29AA # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT +29AC; 29AD # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP +29AD; 29AC # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP +29AE; 29AF # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN +29AF; 29AE # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN +29B8; 2298 # CIRCLED REVERSE SOLIDUS +29C0; 29C1 # CIRCLED LESS-THAN +29C1; 29C0 # CIRCLED GREATER-THAN +29C4; 29C5 # SQUARED RISING DIAGONAL SLASH +29C5; 29C4 # SQUARED FALLING DIAGONAL SLASH +29CF; 29D0 # LEFT TRIANGLE BESIDE VERTICAL BAR +29D0; 29CF # VERTICAL BAR BESIDE RIGHT TRIANGLE +29D1; 29D2 # BOWTIE WITH LEFT HALF BLACK +29D2; 29D1 # BOWTIE WITH RIGHT HALF BLACK +29D4; 29D5 # TIMES WITH LEFT HALF BLACK +29D5; 29D4 # TIMES WITH RIGHT HALF BLACK +29D8; 29D9 # LEFT WIGGLY FENCE +29D9; 29D8 # RIGHT WIGGLY FENCE +29DA; 29DB # LEFT DOUBLE WIGGLY FENCE +29DB; 29DA # RIGHT DOUBLE WIGGLY FENCE +29E8; 29E9 # DOWN-POINTING TRIANGLE WITH LEFT HALF BLACK +29E9; 29E8 # DOWN-POINTING TRIANGLE WITH RIGHT HALF BLACK +29F5; 2215 # REVERSE SOLIDUS OPERATOR +29F8; 29F9 # BIG SOLIDUS +29F9; 29F8 # BIG REVERSE SOLIDUS +29FC; 29FD # LEFT-POINTING CURVED ANGLE BRACKET +29FD; 29FC # RIGHT-POINTING CURVED ANGLE BRACKET +2A2B; 2A2C # MINUS SIGN WITH FALLING DOTS +2A2C; 2A2B # MINUS SIGN WITH RISING DOTS +2A2D; 2A2E # PLUS SIGN IN LEFT HALF CIRCLE +2A2E; 2A2D # PLUS SIGN IN RIGHT HALF CIRCLE +2A34; 2A35 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE +2A35; 2A34 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE +2A3C; 2A3D # INTERIOR PRODUCT +2A3D; 2A3C # RIGHTHAND INTERIOR PRODUCT +2A64; 2A65 # Z NOTATION DOMAIN ANTIRESTRICTION +2A65; 2A64 # Z NOTATION RANGE ANTIRESTRICTION +2A79; 2A7A # LESS-THAN WITH CIRCLE INSIDE +2A7A; 2A79 # GREATER-THAN WITH CIRCLE INSIDE +2A7B; 2A7C # [BEST FIT] LESS-THAN WITH QUESTION MARK ABOVE +2A7C; 2A7B # [BEST FIT] GREATER-THAN WITH QUESTION MARK ABOVE +2A7D; 2A7E # LESS-THAN OR SLANTED EQUAL TO +2A7E; 2A7D # GREATER-THAN OR SLANTED EQUAL TO +2A7F; 2A80 # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE +2A80; 2A7F # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE +2A81; 2A82 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE +2A82; 2A81 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE +2A83; 2A84 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT +2A84; 2A83 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT +2A85; 2A86 # [BEST FIT] LESS-THAN OR APPROXIMATE +2A86; 2A85 # [BEST FIT] GREATER-THAN OR APPROXIMATE +2A87; 2A88 # [BEST FIT] LESS-THAN AND SINGLE-LINE NOT EQUAL TO +2A88; 2A87 # [BEST FIT] GREATER-THAN AND SINGLE-LINE NOT EQUAL TO +2A89; 2A8A # [BEST FIT] LESS-THAN AND NOT APPROXIMATE +2A8A; 2A89 # [BEST FIT] GREATER-THAN AND NOT APPROXIMATE +2A8B; 2A8C # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN +2A8C; 2A8B # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN +2A8D; 2A8E # [BEST FIT] LESS-THAN ABOVE SIMILAR OR EQUAL +2A8E; 2A8D # [BEST FIT] GREATER-THAN ABOVE SIMILAR OR EQUAL +2A8F; 2A90 # [BEST FIT] LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN +2A90; 2A8F # [BEST FIT] GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN +2A91; 2A92 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL +2A92; 2A91 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL +2A93; 2A94 # LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL +2A94; 2A93 # GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL +2A95; 2A96 # SLANTED EQUAL TO OR LESS-THAN +2A96; 2A95 # SLANTED EQUAL TO OR GREATER-THAN +2A97; 2A98 # SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE +2A98; 2A97 # SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE +2A99; 2A9A # DOUBLE-LINE EQUAL TO OR LESS-THAN +2A9A; 2A99 # DOUBLE-LINE EQUAL TO OR GREATER-THAN +2A9B; 2A9C # DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN +2A9C; 2A9B # DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN +2A9D; 2A9E # [BEST FIT] SIMILAR OR LESS-THAN +2A9E; 2A9D # [BEST FIT] SIMILAR OR GREATER-THAN +2A9F; 2AA0 # [BEST FIT] SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN +2AA0; 2A9F # [BEST FIT] SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN +2AA1; 2AA2 # DOUBLE NESTED LESS-THAN +2AA2; 2AA1 # DOUBLE NESTED GREATER-THAN +2AA6; 2AA7 # LESS-THAN CLOSED BY CURVE +2AA7; 2AA6 # GREATER-THAN CLOSED BY CURVE +2AA8; 2AA9 # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL +2AA9; 2AA8 # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL +2AAA; 2AAB # SMALLER THAN +2AAB; 2AAA # LARGER THAN +2AAC; 2AAD # SMALLER THAN OR EQUAL TO +2AAD; 2AAC # LARGER THAN OR EQUAL TO +2AAF; 2AB0 # PRECEDES ABOVE SINGLE-LINE EQUALS SIGN +2AB0; 2AAF # SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN +2AB1; 2AB2 # [BEST FIT] PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO +2AB2; 2AB1 # [BEST FIT] SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO +2AB3; 2AB4 # PRECEDES ABOVE EQUALS SIGN +2AB4; 2AB3 # SUCCEEDS ABOVE EQUALS SIGN +2AB5; 2AB6 # [BEST FIT] PRECEDES ABOVE NOT EQUAL TO +2AB6; 2AB5 # [BEST FIT] SUCCEEDS ABOVE NOT EQUAL TO +2AB7; 2AB8 # [BEST FIT] PRECEDES ABOVE ALMOST EQUAL TO +2AB8; 2AB7 # [BEST FIT] SUCCEEDS ABOVE ALMOST EQUAL TO +2AB9; 2ABA # [BEST FIT] PRECEDES ABOVE NOT ALMOST EQUAL TO +2ABA; 2AB9 # [BEST FIT] SUCCEEDS ABOVE NOT ALMOST EQUAL TO +2ABB; 2ABC # DOUBLE PRECEDES +2ABC; 2ABB # DOUBLE SUCCEEDS +2ABD; 2ABE # SUBSET WITH DOT +2ABE; 2ABD # SUPERSET WITH DOT +2ABF; 2AC0 # SUBSET WITH PLUS SIGN BELOW +2AC0; 2ABF # SUPERSET WITH PLUS SIGN BELOW +2AC1; 2AC2 # SUBSET WITH MULTIPLICATION SIGN BELOW +2AC2; 2AC1 # SUPERSET WITH MULTIPLICATION SIGN BELOW +2AC3; 2AC4 # SUBSET OF OR EQUAL TO WITH DOT ABOVE +2AC4; 2AC3 # SUPERSET OF OR EQUAL TO WITH DOT ABOVE +2AC5; 2AC6 # SUBSET OF ABOVE EQUALS SIGN +2AC6; 2AC5 # SUPERSET OF ABOVE EQUALS SIGN +2AC7; 2AC8 # [BEST FIT] SUBSET OF ABOVE TILDE OPERATOR +2AC8; 2AC7 # [BEST FIT] SUPERSET OF ABOVE TILDE OPERATOR +2AC9; 2ACA # [BEST FIT] SUBSET OF ABOVE ALMOST EQUAL TO +2ACA; 2AC9 # [BEST FIT] SUPERSET OF ABOVE ALMOST EQUAL TO +2ACB; 2ACC # [BEST FIT] SUBSET OF ABOVE NOT EQUAL TO +2ACC; 2ACB # [BEST FIT] SUPERSET OF ABOVE NOT EQUAL TO +2ACD; 2ACE # SQUARE LEFT OPEN BOX OPERATOR +2ACE; 2ACD # SQUARE RIGHT OPEN BOX OPERATOR +2ACF; 2AD0 # CLOSED SUBSET +2AD0; 2ACF # CLOSED SUPERSET +2AD1; 2AD2 # CLOSED SUBSET OR EQUAL TO +2AD2; 2AD1 # CLOSED SUPERSET OR EQUAL TO +2AD3; 2AD4 # SUBSET ABOVE SUPERSET +2AD4; 2AD3 # SUPERSET ABOVE SUBSET +2AD5; 2AD6 # SUBSET ABOVE SUBSET +2AD6; 2AD5 # SUPERSET ABOVE SUPERSET +2ADE; 22A6 # SHORT LEFT TACK +2AE3; 22A9 # DOUBLE VERTICAL BAR LEFT TURNSTILE +2AE4; 22A8 # VERTICAL BAR DOUBLE LEFT TURNSTILE +2AE5; 22AB # DOUBLE VERTICAL BAR DOUBLE LEFT TURNSTILE +2AEC; 2AED # DOUBLE STROKE NOT SIGN +2AED; 2AEC # REVERSED DOUBLE STROKE NOT SIGN +2AEE; 2224 # DOES NOT DIVIDE WITH REVERSED NEGATION SLASH +2AF7; 2AF8 # TRIPLE NESTED LESS-THAN +2AF8; 2AF7 # TRIPLE NESTED GREATER-THAN +2AF9; 2AFA # DOUBLE-LINE SLANTED LESS-THAN OR EQUAL TO +2AFA; 2AF9 # DOUBLE-LINE SLANTED GREATER-THAN OR EQUAL TO +2BFE; 221F # REVERSED RIGHT ANGLE +2E02; 2E03 # LEFT SUBSTITUTION BRACKET +2E03; 2E02 # RIGHT SUBSTITUTION BRACKET +2E04; 2E05 # LEFT DOTTED SUBSTITUTION BRACKET +2E05; 2E04 # RIGHT DOTTED SUBSTITUTION BRACKET +2E09; 2E0A # LEFT TRANSPOSITION BRACKET +2E0A; 2E09 # RIGHT TRANSPOSITION BRACKET +2E0C; 2E0D # LEFT RAISED OMISSION BRACKET +2E0D; 2E0C # RIGHT RAISED OMISSION BRACKET +2E1C; 2E1D # LEFT LOW PARAPHRASE BRACKET +2E1D; 2E1C # RIGHT LOW PARAPHRASE BRACKET +2E20; 2E21 # LEFT VERTICAL BAR WITH QUILL +2E21; 2E20 # RIGHT VERTICAL BAR WITH QUILL +2E22; 2E23 # TOP LEFT HALF BRACKET +2E23; 2E22 # TOP RIGHT HALF BRACKET +2E24; 2E25 # BOTTOM LEFT HALF BRACKET +2E25; 2E24 # BOTTOM RIGHT HALF BRACKET +2E26; 2E27 # LEFT SIDEWAYS U BRACKET +2E27; 2E26 # RIGHT SIDEWAYS U BRACKET +2E28; 2E29 # LEFT DOUBLE PARENTHESIS +2E29; 2E28 # RIGHT DOUBLE PARENTHESIS +2E55; 2E56 # LEFT SQUARE BRACKET WITH STROKE +2E56; 2E55 # RIGHT SQUARE BRACKET WITH STROKE +2E57; 2E58 # LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58; 2E57 # RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59; 2E5A # TOP HALF LEFT PARENTHESIS +2E5A; 2E59 # TOP HALF RIGHT PARENTHESIS +2E5B; 2E5C # BOTTOM HALF LEFT PARENTHESIS +2E5C; 2E5B # BOTTOM HALF RIGHT PARENTHESIS +3008; 3009 # LEFT ANGLE BRACKET +3009; 3008 # RIGHT ANGLE BRACKET +300A; 300B # LEFT DOUBLE ANGLE BRACKET +300B; 300A # RIGHT DOUBLE ANGLE BRACKET +300C; 300D # [BEST FIT] LEFT CORNER BRACKET +300D; 300C # [BEST FIT] RIGHT CORNER BRACKET +300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET +300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET +3010; 3011 # LEFT BLACK LENTICULAR BRACKET +3011; 3010 # RIGHT BLACK LENTICULAR BRACKET +3014; 3015 # LEFT TORTOISE SHELL BRACKET +3015; 3014 # RIGHT TORTOISE SHELL BRACKET +3016; 3017 # LEFT WHITE LENTICULAR BRACKET +3017; 3016 # RIGHT WHITE LENTICULAR BRACKET +3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET +3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET +301A; 301B # LEFT WHITE SQUARE BRACKET +301B; 301A # RIGHT WHITE SQUARE BRACKET +FE59; FE5A # SMALL LEFT PARENTHESIS +FE5A; FE59 # SMALL RIGHT PARENTHESIS +FE5B; FE5C # SMALL LEFT CURLY BRACKET +FE5C; FE5B # SMALL RIGHT CURLY BRACKET +FE5D; FE5E # SMALL LEFT TORTOISE SHELL BRACKET +FE5E; FE5D # SMALL RIGHT TORTOISE SHELL BRACKET +FE64; FE65 # SMALL LESS-THAN SIGN +FE65; FE64 # SMALL GREATER-THAN SIGN +FF08; FF09 # FULLWIDTH LEFT PARENTHESIS +FF09; FF08 # FULLWIDTH RIGHT PARENTHESIS +FF1C; FF1E # FULLWIDTH LESS-THAN SIGN +FF1E; FF1C # FULLWIDTH GREATER-THAN SIGN +FF3B; FF3D # FULLWIDTH LEFT SQUARE BRACKET +FF3D; FF3B # FULLWIDTH RIGHT SQUARE BRACKET +FF5B; FF5D # FULLWIDTH LEFT CURLY BRACKET +FF5D; FF5B # FULLWIDTH RIGHT CURLY BRACKET +FF5F; FF60 # FULLWIDTH LEFT WHITE PARENTHESIS +FF60; FF5F # FULLWIDTH RIGHT WHITE PARENTHESIS +FF62; FF63 # [BEST FIT] HALFWIDTH LEFT CORNER BRACKET +FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET + +# The following characters have no appropriate mirroring character. +# For these characters it is up to the rendering system +# to provide mirrored glyphs. + +# 2140; DOUBLE-STRUCK N-ARY SUMMATION +# 2201; COMPLEMENT +# 2202; PARTIAL DIFFERENTIAL +# 2203; THERE EXISTS +# 2204; THERE DOES NOT EXIST +# 2211; N-ARY SUMMATION +# 2216; SET MINUS +# 221A; SQUARE ROOT +# 221B; CUBE ROOT +# 221C; FOURTH ROOT +# 221D; PROPORTIONAL TO +# 2226; NOT PARALLEL TO +# 222B; INTEGRAL +# 222C; DOUBLE INTEGRAL +# 222D; TRIPLE INTEGRAL +# 222E; CONTOUR INTEGRAL +# 222F; SURFACE INTEGRAL +# 2230; VOLUME INTEGRAL +# 2231; CLOCKWISE INTEGRAL +# 2232; CLOCKWISE CONTOUR INTEGRAL +# 2233; ANTICLOCKWISE CONTOUR INTEGRAL +# 2239; EXCESS +# 223B; HOMOTHETIC +# 223E; INVERTED LAZY S +# 223F; SINE WAVE +# 2240; WREATH PRODUCT +# 2241; NOT TILDE +# 2242; MINUS TILDE +# 2244; NOT ASYMPTOTICALLY EQUAL TO +# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +# 2248; ALMOST EQUAL TO +# 2249; NOT ALMOST EQUAL TO +# 224A; ALMOST EQUAL OR EQUAL TO +# 224B; TRIPLE TILDE +# 225F; QUESTIONED EQUAL TO +# 2260; NOT EQUAL TO +# 2262; NOT IDENTICAL TO +# 226D; NOT EQUIVALENT TO +# 228C; MULTISET +# 22A7; MODELS +# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE +# 22AC; DOES NOT PROVE +# 22AD; NOT TRUE +# 22AE; DOES NOT FORCE +# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22BE; RIGHT ANGLE WITH ARC +# 22BF; RIGHT TRIANGLE +# 22F5; ELEMENT OF WITH DOT ABOVE +# 22F8; ELEMENT OF WITH UNDERBAR +# 22F9; ELEMENT OF WITH TWO HORIZONTAL STROKES +# 22FF; Z NOTATION BAG MEMBERSHIP +# 2320; TOP HALF INTEGRAL +# 2321; BOTTOM HALF INTEGRAL +# 27C0; THREE DIMENSIONAL ANGLE +# 27CC; LONG DIVISION +# 27D3; LOWER RIGHT CORNER WITH DOT +# 27D4; UPPER LEFT CORNER WITH DOT +# 299C; RIGHT ANGLE VARIANT WITH SQUARE +# 299D; MEASURED RIGHT ANGLE WITH DOT +# 299E; ANGLE WITH S INSIDE +# 299F; ACUTE ANGLE +# 29A2; TURNED ANGLE +# 29A6; OBLIQUE ANGLE OPENING UP +# 29A7; OBLIQUE ANGLE OPENING DOWN +# 29C2; CIRCLE WITH SMALL CIRCLE TO THE RIGHT +# 29C3; CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT +# 29C9; TWO JOINED SQUARES +# 29CE; RIGHT TRIANGLE ABOVE LEFT TRIANGLE +# 29DC; INCOMPLETE INFINITY +# 29E1; INCREASES AS +# 29E3; EQUALS SIGN AND SLANTED PARALLEL +# 29E4; EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE +# 29E5; IDENTICAL TO AND SLANTED PARALLEL +# 29F4; RULE-DELAYED +# 29F6; SOLIDUS WITH OVERBAR +# 29F7; REVERSE SOLIDUS WITH HORIZONTAL STROKE +# 2A0A; MODULO TWO SUM +# 2A0B; SUMMATION WITH INTEGRAL +# 2A0C; QUADRUPLE INTEGRAL OPERATOR +# 2A0D; FINITE PART INTEGRAL +# 2A0E; INTEGRAL WITH DOUBLE STROKE +# 2A0F; INTEGRAL AVERAGE WITH SLASH +# 2A10; CIRCULATION FUNCTION +# 2A11; ANTICLOCKWISE INTEGRATION +# 2A12; LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE +# 2A13; LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE +# 2A14; LINE INTEGRATION NOT INCLUDING THE POLE +# 2A15; INTEGRAL AROUND A POINT OPERATOR +# 2A16; QUATERNION INTEGRAL OPERATOR +# 2A17; INTEGRAL WITH LEFTWARDS ARROW WITH HOOK +# 2A18; INTEGRAL WITH TIMES SIGN +# 2A19; INTEGRAL WITH INTERSECTION +# 2A1A; INTEGRAL WITH UNION +# 2A1B; INTEGRAL WITH OVERBAR +# 2A1C; INTEGRAL WITH UNDERBAR +# 2A1E; LARGE LEFT TRIANGLE OPERATOR +# 2A1F; Z NOTATION SCHEMA COMPOSITION +# 2A20; Z NOTATION SCHEMA PIPING +# 2A21; Z NOTATION SCHEMA PROJECTION +# 2A24; PLUS SIGN WITH TILDE ABOVE +# 2A26; PLUS SIGN WITH TILDE BELOW +# 2A29; MINUS SIGN WITH COMMA ABOVE +# 2A3E; Z NOTATION RELATIONAL COMPOSITION +# 2A57; SLOPING LARGE OR +# 2A58; SLOPING LARGE AND +# 2A6A; TILDE OPERATOR WITH DOT ABOVE +# 2A6B; TILDE OPERATOR WITH RISING DOTS +# 2A6C; SIMILAR MINUS SIMILAR +# 2A6D; CONGRUENT WITH DOT ABOVE +# 2A6F; ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT +# 2A70; APPROXIMATELY EQUAL OR EQUAL TO +# 2A73; EQUALS SIGN ABOVE TILDE OPERATOR +# 2A74; DOUBLE COLON EQUAL +# 2AA3; DOUBLE NESTED LESS-THAN WITH UNDERBAR +# 2ADC; FORKING +# 2AE2; VERTICAL BAR TRIPLE RIGHT TURNSTILE +# 2AE6; LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL +# 2AF3; PARALLEL WITH TILDE OPERATOR +# 2AFB; TRIPLE SOLIDUS BINARY RELATION +# 2AFD; DOUBLE SOLIDUS OPERATOR +# 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +# 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +# 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +# 1D789; MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +# 1D7C3; MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/CaseFolding.txt b/3rd/pcre2/maint/Unicode.tables/CaseFolding.txt new file mode 100644 index 00000000..1b7a9c15 --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/CaseFolding.txt @@ -0,0 +1,1654 @@ +# CaseFolding-16.0.0.txt +# Date: 2024-04-30, 21:48:11 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard. +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# ; ; ; # +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# ================================================================= +0041; C; 0061; # LATIN CAPITAL LETTER A +0042; C; 0062; # LATIN CAPITAL LETTER B +0043; C; 0063; # LATIN CAPITAL LETTER C +0044; C; 0064; # LATIN CAPITAL LETTER D +0045; C; 0065; # LATIN CAPITAL LETTER E +0046; C; 0066; # LATIN CAPITAL LETTER F +0047; C; 0067; # LATIN CAPITAL LETTER G +0048; C; 0068; # LATIN CAPITAL LETTER H +0049; C; 0069; # LATIN CAPITAL LETTER I +0049; T; 0131; # LATIN CAPITAL LETTER I +004A; C; 006A; # LATIN CAPITAL LETTER J +004B; C; 006B; # LATIN CAPITAL LETTER K +004C; C; 006C; # LATIN CAPITAL LETTER L +004D; C; 006D; # LATIN CAPITAL LETTER M +004E; C; 006E; # LATIN CAPITAL LETTER N +004F; C; 006F; # LATIN CAPITAL LETTER O +0050; C; 0070; # LATIN CAPITAL LETTER P +0051; C; 0071; # LATIN CAPITAL LETTER Q +0052; C; 0072; # LATIN CAPITAL LETTER R +0053; C; 0073; # LATIN CAPITAL LETTER S +0054; C; 0074; # LATIN CAPITAL LETTER T +0055; C; 0075; # LATIN CAPITAL LETTER U +0056; C; 0076; # LATIN CAPITAL LETTER V +0057; C; 0077; # LATIN CAPITAL LETTER W +0058; C; 0078; # LATIN CAPITAL LETTER X +0059; C; 0079; # LATIN CAPITAL LETTER Y +005A; C; 007A; # LATIN CAPITAL LETTER Z +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; C; 014B; # LATIN CAPITAL LETTER ENG +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; C; 0153; # LATIN CAPITAL LIGATURE OE +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON +017F; C; 0073; # LATIN SMALL LETTER LONG S +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA +0196; C; 0269; # LATIN CAPITAL LETTER IOTA +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN +01A2; C; 01A3; # LATIN CAPITAL LETTER OI +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK +01A6; C; 0280; # LATIN LETTER YR +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO +01A9; C; 0283; # LATIN CAPITAL LETTER ESH +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE +01B7; C; 0292; # LATIN CAPITAL LETTER EZH +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; C; 021D; # LATIN CAPITAL LETTER YOGH +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222; C; 0223; # LATIN CAPITAL LETTER OU +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F; C; 03F3; # GREEK CAPITAL LETTER YOT +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA +0392; C; 03B2; # GREEK CAPITAL LETTER BETA +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA +0397; C; 03B7; # GREEK CAPITAL LETTER ETA +0398; C; 03B8; # GREEK CAPITAL LETTER THETA +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA +039C; C; 03BC; # GREEK CAPITAL LETTER MU +039D; C; 03BD; # GREEK CAPITAL LETTER NU +039E; C; 03BE; # GREEK CAPITAL LETTER XI +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON +03A0; C; 03C0; # GREEK CAPITAL LETTER PI +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL +03D0; C; 03B2; # GREEK BETA SYMBOL +03D1; C; 03B8; # GREEK THETA SYMBOL +03D5; C; 03C6; # GREEK PHI SYMBOL +03D6; C; 03C0; # GREEK PI SYMBOL +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA +03DA; C; 03DB; # GREEK LETTER STIGMA +03DC; C; 03DD; # GREEK LETTER DIGAMMA +03DE; C; 03DF; # GREEK LETTER KOPPA +03E0; C; 03E1; # GREEK LETTER SAMPI +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI +03F0; C; 03BA; # GREEK KAPPA SYMBOL +03F1; C; 03C1; # GREEK RHO SYMBOL +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE +0410; C; 0430; # CYRILLIC CAPITAL LETTER A +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE +0418; C; 0438; # CYRILLIC CAPITAL LETTER I +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN +041E; C; 043E; # CYRILLIC CAPITAL LETTER O +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE +0423; C; 0443; # CYRILLIC CAPITAL LETTER U +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN +042D; C; 044D; # CYRILLIC CAPITAL LETTER E +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE +052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE +052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN +13F8; C; 13F0; # CHEROKEE SMALL LETTER YE +13F9; C; 13F1; # CHEROKEE SMALL LETTER YI +13FA; C; 13F2; # CHEROKEE SMALL LETTER YO +13FB; C; 13F3; # CHEROKEE SMALL LETTER YU +13FC; C; 13F4; # CHEROKEE SMALL LETTER YV +13FD; C; 13F5; # CHEROKEE SMALL LETTER MV +1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE +1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE +1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O +1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES +1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE +1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE +1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN +1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT +1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK +1C89; C; 1C8A; # CYRILLIC CAPITAL LETTER TJE +1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN +1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN +1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN +1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON +1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN +1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN +1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN +1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN +1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN +1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN +1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS +1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN +1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR +1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON +1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR +1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR +1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE +1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN +1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR +1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN +1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR +1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR +1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN +1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR +1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN +1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN +1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN +1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL +1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL +1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR +1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN +1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN +1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE +1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE +1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE +1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE +1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR +1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE +1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI +1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN +1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI +1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN +1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN +1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN +1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; C; 03C9; # OHM SIGN +212A; C; 006B; # KELVIN SIGN +212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F +2160; C; 2170; # ROMAN NUMERAL ONE +2161; C; 2171; # ROMAN NUMERAL TWO +2162; C; 2172; # ROMAN NUMERAL THREE +2163; C; 2173; # ROMAN NUMERAL FOUR +2164; C; 2174; # ROMAN NUMERAL FIVE +2165; C; 2175; # ROMAN NUMERAL SIX +2166; C; 2176; # ROMAN NUMERAL SEVEN +2167; C; 2177; # ROMAN NUMERAL EIGHT +2168; C; 2178; # ROMAN NUMERAL NINE +2169; C; 2179; # ROMAN NUMERAL TEN +216A; C; 217A; # ROMAN NUMERAL ELEVEN +216B; C; 217B; # ROMAN NUMERAL TWELVE +216C; C; 217C; # ROMAN NUMERAL FIFTY +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O +A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR +A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH +A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE +A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE +A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE +A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK +A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E +A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G +A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT +A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K +A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T +A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL +A7B3; C; AB53; # LATIN CAPITAL LETTER CHI +A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA +A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA +A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE +A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A +A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I +A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U +A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O +A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W +A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK +A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK +A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK +A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN +A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S +A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA +A7DC; C; 019B; # LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H +AB70; C; 13A0; # CHEROKEE SMALL LETTER A +AB71; C; 13A1; # CHEROKEE SMALL LETTER E +AB72; C; 13A2; # CHEROKEE SMALL LETTER I +AB73; C; 13A3; # CHEROKEE SMALL LETTER O +AB74; C; 13A4; # CHEROKEE SMALL LETTER U +AB75; C; 13A5; # CHEROKEE SMALL LETTER V +AB76; C; 13A6; # CHEROKEE SMALL LETTER GA +AB77; C; 13A7; # CHEROKEE SMALL LETTER KA +AB78; C; 13A8; # CHEROKEE SMALL LETTER GE +AB79; C; 13A9; # CHEROKEE SMALL LETTER GI +AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO +AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU +AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV +AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA +AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE +AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI +AB80; C; 13B0; # CHEROKEE SMALL LETTER HO +AB81; C; 13B1; # CHEROKEE SMALL LETTER HU +AB82; C; 13B2; # CHEROKEE SMALL LETTER HV +AB83; C; 13B3; # CHEROKEE SMALL LETTER LA +AB84; C; 13B4; # CHEROKEE SMALL LETTER LE +AB85; C; 13B5; # CHEROKEE SMALL LETTER LI +AB86; C; 13B6; # CHEROKEE SMALL LETTER LO +AB87; C; 13B7; # CHEROKEE SMALL LETTER LU +AB88; C; 13B8; # CHEROKEE SMALL LETTER LV +AB89; C; 13B9; # CHEROKEE SMALL LETTER MA +AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME +AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI +AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO +AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU +AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA +AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA +AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH +AB91; C; 13C1; # CHEROKEE SMALL LETTER NE +AB92; C; 13C2; # CHEROKEE SMALL LETTER NI +AB93; C; 13C3; # CHEROKEE SMALL LETTER NO +AB94; C; 13C4; # CHEROKEE SMALL LETTER NU +AB95; C; 13C5; # CHEROKEE SMALL LETTER NV +AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA +AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE +AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI +AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO +AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU +AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV +AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA +AB9D; C; 13CD; # CHEROKEE SMALL LETTER S +AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE +AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI +ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO +ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU +ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV +ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA +ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA +ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE +ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE +ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI +ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI +ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO +ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU +ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV +ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA +ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA +ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE +ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI +ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO +ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU +ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV +ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA +ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE +ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI +ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO +ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU +ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV +ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA +ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE +ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI +ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO +ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU +ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV +ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z +10400; C; 10428; # DESERET CAPITAL LETTER LONG I +10401; C; 10429; # DESERET CAPITAL LETTER LONG E +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO +1040C; C; 10434; # DESERET CAPITAL LETTER AY +1040D; C; 10435; # DESERET CAPITAL LETTER OW +1040E; C; 10436; # DESERET CAPITAL LETTER WU +1040F; C; 10437; # DESERET CAPITAL LETTER YEE +10410; C; 10438; # DESERET CAPITAL LETTER H +10411; C; 10439; # DESERET CAPITAL LETTER PEE +10412; C; 1043A; # DESERET CAPITAL LETTER BEE +10413; C; 1043B; # DESERET CAPITAL LETTER TEE +10414; C; 1043C; # DESERET CAPITAL LETTER DEE +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE +10416; C; 1043E; # DESERET CAPITAL LETTER JEE +10417; C; 1043F; # DESERET CAPITAL LETTER KAY +10418; C; 10440; # DESERET CAPITAL LETTER GAY +10419; C; 10441; # DESERET CAPITAL LETTER EF +1041A; C; 10442; # DESERET CAPITAL LETTER VEE +1041B; C; 10443; # DESERET CAPITAL LETTER ETH +1041C; C; 10444; # DESERET CAPITAL LETTER THEE +1041D; C; 10445; # DESERET CAPITAL LETTER ES +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE +1041F; C; 10447; # DESERET CAPITAL LETTER ESH +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE +10421; C; 10449; # DESERET CAPITAL LETTER ER +10422; C; 1044A; # DESERET CAPITAL LETTER EL +10423; C; 1044B; # DESERET CAPITAL LETTER EM +10424; C; 1044C; # DESERET CAPITAL LETTER EN +10425; C; 1044D; # DESERET CAPITAL LETTER ENG +10426; C; 1044E; # DESERET CAPITAL LETTER OI +10427; C; 1044F; # DESERET CAPITAL LETTER EW +104B0; C; 104D8; # OSAGE CAPITAL LETTER A +104B1; C; 104D9; # OSAGE CAPITAL LETTER AI +104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN +104B3; C; 104DB; # OSAGE CAPITAL LETTER AH +104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA +104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA +104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA +104B7; C; 104DF; # OSAGE CAPITAL LETTER E +104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN +104B9; C; 104E1; # OSAGE CAPITAL LETTER HA +104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA +104BB; C; 104E3; # OSAGE CAPITAL LETTER I +104BC; C; 104E4; # OSAGE CAPITAL LETTER KA +104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA +104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA +104BF; C; 104E7; # OSAGE CAPITAL LETTER LA +104C0; C; 104E8; # OSAGE CAPITAL LETTER MA +104C1; C; 104E9; # OSAGE CAPITAL LETTER NA +104C2; C; 104EA; # OSAGE CAPITAL LETTER O +104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN +104C4; C; 104EC; # OSAGE CAPITAL LETTER PA +104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA +104C6; C; 104EE; # OSAGE CAPITAL LETTER SA +104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA +104C8; C; 104F0; # OSAGE CAPITAL LETTER TA +104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA +104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA +104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA +104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA +104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA +104CE; C; 104F6; # OSAGE CAPITAL LETTER U +104CF; C; 104F7; # OSAGE CAPITAL LETTER WA +104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA +104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA +104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA +104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA +10570; C; 10597; # VITHKUQI CAPITAL LETTER A +10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE +10572; C; 10599; # VITHKUQI CAPITAL LETTER BE +10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE +10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE +10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE +10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE +10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI +10578; C; 1059F; # VITHKUQI CAPITAL LETTER E +10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE +1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA +1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA +1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA +1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I +1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE +10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE +10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA +10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA +10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA +10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME +10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE +10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE +10587; C; 105AE; # VITHKUQI CAPITAL LETTER O +10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE +10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA +1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE +1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE +1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE +1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE +1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE +10590; C; 105B7; # VITHKUQI CAPITAL LETTER U +10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE +10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE +10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y +10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE +10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A +10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA +10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB +10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB +10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC +10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC +10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS +10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED +10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND +10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E +10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E +10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE +10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF +10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG +10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY +10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH +10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I +10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II +10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ +10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK +10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK +10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK +10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL +10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY +10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM +10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN +10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY +10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O +10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO +10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE +10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE +10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE +10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP +10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP +10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER +10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER +10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES +10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ +10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET +10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT +10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY +10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH +10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U +10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU +10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE +10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE +10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV +10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ +10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS +10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN +10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +10D50; C; 10D70; # GARAY CAPITAL LETTER A +10D51; C; 10D71; # GARAY CAPITAL LETTER CA +10D52; C; 10D72; # GARAY CAPITAL LETTER MA +10D53; C; 10D73; # GARAY CAPITAL LETTER KA +10D54; C; 10D74; # GARAY CAPITAL LETTER BA +10D55; C; 10D75; # GARAY CAPITAL LETTER JA +10D56; C; 10D76; # GARAY CAPITAL LETTER SA +10D57; C; 10D77; # GARAY CAPITAL LETTER WA +10D58; C; 10D78; # GARAY CAPITAL LETTER LA +10D59; C; 10D79; # GARAY CAPITAL LETTER GA +10D5A; C; 10D7A; # GARAY CAPITAL LETTER DA +10D5B; C; 10D7B; # GARAY CAPITAL LETTER XA +10D5C; C; 10D7C; # GARAY CAPITAL LETTER YA +10D5D; C; 10D7D; # GARAY CAPITAL LETTER TA +10D5E; C; 10D7E; # GARAY CAPITAL LETTER RA +10D5F; C; 10D7F; # GARAY CAPITAL LETTER NYA +10D60; C; 10D80; # GARAY CAPITAL LETTER FA +10D61; C; 10D81; # GARAY CAPITAL LETTER NA +10D62; C; 10D82; # GARAY CAPITAL LETTER PA +10D63; C; 10D83; # GARAY CAPITAL LETTER HA +10D64; C; 10D84; # GARAY CAPITAL LETTER OLD KA +10D65; C; 10D85; # GARAY CAPITAL LETTER OLD NA +118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA +118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A +118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI +118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU +118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA +118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO +118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II +118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU +118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E +118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O +118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG +118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA +118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO +118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY +118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ +118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC +118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN +118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD +118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE +118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG +118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA +118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT +118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM +118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU +118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU +118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO +118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO +118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR +118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR +118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU +118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII +118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M +16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S +16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V +16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W +16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU +16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z +16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP +16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P +16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T +16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G +16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F +16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I +16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K +16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A +16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J +16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E +16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B +16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C +16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U +16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU +16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L +16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q +16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP +16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY +16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X +16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D +16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE +16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N +16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R +16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O +16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI +16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y +1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF +1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI +1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM +1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM +1E904; C; 1E926; # ADLAM CAPITAL LETTER BA +1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE +1E906; C; 1E928; # ADLAM CAPITAL LETTER PE +1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE +1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA +1E909; C; 1E92B; # ADLAM CAPITAL LETTER E +1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA +1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I +1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O +1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA +1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE +1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW +1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN +1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF +1E912; C; 1E934; # ADLAM CAPITAL LETTER YA +1E913; C; 1E935; # ADLAM CAPITAL LETTER U +1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM +1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI +1E916; C; 1E938; # ADLAM CAPITAL LETTER HA +1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF +1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA +1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA +1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU +1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA +1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA +1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA +1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE +1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL +1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO +1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA +# +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/DerivedBidiClass.txt b/3rd/pcre2/maint/Unicode.tables/DerivedBidiClass.txt new file mode 100644 index 00000000..2aceac0a --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/DerivedBidiClass.txt @@ -0,0 +1,2579 @@ +# DerivedBidiClass-16.0.0.txt +# Date: 2024-04-30, 21:48:13 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Bidi Class (listing UnicodeData.txt, field 4: see UAX #44: https://www.unicode.org/reports/tr44/) +# Unlike other properties, unassigned code points in blocks +# reserved for right-to-left scripts are given either values R or AL, +# and unassigned code points in the Currency Symbols block are given the value ET. +# For details see the @missing lines below. +# +# The unassigned code points that default to BN have one of the following properties: +# Default_Ignorable_Code_Point +# Noncharacter_Code_Point +# +# For all other cases: + +# All code points not explicitly listed for Bidi_Class +# have the value Left_To_Right (L). + +# @missing: 0000..10FFFF; Left_To_Right + +# 0590..05FF Hebrew +# @missing: 0590..05FF; Right_To_Left + +# 0600..06FF Arabic +# 0700..074F Syriac +# 0750..077F Arabic_Supplement +# 0780..07BF Thaana +# @missing: 0600..07BF; Arabic_Letter + +# 07C0..07FF NKo +# 0800..083F Samaritan +# 0840..085F Mandaic +# @missing: 07C0..085F; Right_To_Left + +# 0860..086F Syriac_Supplement +# 0870..089F Arabic_Extended_B +# 08A0..08FF Arabic_Extended_A +# @missing: 0860..08FF; Arabic_Letter + +# 20A0..20CF Currency_Symbols +# @missing: 20A0..20CF; European_Terminator + +# FB00..FB4F Alphabetic_Presentation_Forms (partial) +# @missing: FB1D..FB4F; Right_To_Left + +# FB50..FDFF Arabic_Presentation_Forms_A (partial) +# @missing: FB50..FDCF; Arabic_Letter + +# FB50..FDFF Arabic_Presentation_Forms_A (partial) +# @missing: FDF0..FDFF; Arabic_Letter + +# FE70..FEFF Arabic_Presentation_Forms_B +# @missing: FE70..FEFF; Arabic_Letter + +# 10800..1083F Cypriot_Syllabary +# 10840..1085F Imperial_Aramaic +# 10860..1087F Palmyrene +# 10880..108AF Nabataean +# 108E0..108FF Hatran +# 10900..1091F Phoenician +# 10920..1093F Lydian +# 10980..1099F Meroitic_Hieroglyphs +# 109A0..109FF Meroitic_Cursive +# 10A00..10A5F Kharoshthi +# 10A60..10A7F Old_South_Arabian +# 10A80..10A9F Old_North_Arabian +# 10AC0..10AFF Manichaean +# 10B00..10B3F Avestan +# 10B40..10B5F Inscriptional_Parthian +# 10B60..10B7F Inscriptional_Pahlavi +# 10B80..10BAF Psalter_Pahlavi +# 10C00..10C4F Old_Turkic +# 10C80..10CFF Old_Hungarian +# @missing: 10800..10CFF; Right_To_Left + +# 10D00..10D3F Hanifi_Rohingya +# @missing: 10D00..10D3F; Arabic_Letter + +# 10D40..10D8F Garay +# 10E60..10E7F Rumi_Numeral_Symbols +# 10E80..10EBF Yezidi +# @missing: 10D40..10EBF; Right_To_Left + +# 10EC0..10EFF Arabic_Extended_C +# @missing: 10EC0..10EFF; Arabic_Letter + +# 10F00..10F2F Old_Sogdian +# @missing: 10F00..10F2F; Right_To_Left + +# 10F30..10F6F Sogdian +# @missing: 10F30..10F6F; Arabic_Letter + +# 10F70..10FAF Old_Uyghur +# 10FB0..10FDF Chorasmian +# 10FE0..10FFF Elymaic +# @missing: 10F70..10FFF; Right_To_Left + +# 1E800..1E8DF Mende_Kikakui +# 1E900..1E95F Adlam +# @missing: 1E800..1EC6F; Right_To_Left + +# 1EC70..1ECBF Indic_Siyaq_Numbers +# @missing: 1EC70..1ECBF; Arabic_Letter + +# @missing: 1ECC0..1ECFF; Right_To_Left + +# 1ED00..1ED4F Ottoman_Siyaq_Numbers +# @missing: 1ED00..1ED4F; Arabic_Letter + +# @missing: 1ED50..1EDFF; Right_To_Left + +# 1EE00..1EEFF Arabic_Mathematical_Alphabetic_Symbols +# @missing: 1EE00..1EEFF; Arabic_Letter + +# @missing: 1EF00..1EFFF; Right_To_Left + +# ================================================ + +# Bidi_Class=Left_To_Right + +0041..005A ; L # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; L # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; L # Lo FEMININE ORDINAL INDICATOR +00B5 ; L # L& MICRO SIGN +00BA ; L # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; L # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; L # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; L # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; L # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; L # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; L # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP +02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; L # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EE ; L # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; L # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; L # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; L # Lm GREEK YPOGEGRAMMENI +037B..037D ; L # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; L # L& GREEK CAPITAL LETTER YOT +0386 ; L # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; L # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; L # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; L # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; L # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0482 ; L # So CYRILLIC THOUSANDS SIGN +048A..052F ; L # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; L # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; L # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; L # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; L # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; L # Po ARMENIAN FULL STOP +0903 ; L # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; L # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093B ; L # Mc DEVANAGARI VOWEL SIGN OOE +093D ; L # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; L # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; L # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; L # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; L # Lo DEVANAGARI OM +0958..0961 ; L # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0964..0965 ; L # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; L # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; L # Po DEVANAGARI ABBREVIATION SIGN +0971 ; L # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; L # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0982..0983 ; L # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; L # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; L # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; L # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; L # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; L # Lo BENGALI LETTER LA +09B6..09B9 ; L # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; L # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; L # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C7..09C8 ; L # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; L # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; L # Lo BENGALI LETTER KHANDA TA +09D7 ; L # Mc BENGALI AU LENGTH MARK +09DC..09DD ; L # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; L # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E6..09EF ; L # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; L # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F4..09F9 ; L # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; L # So BENGALI ISSHAR +09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; L # Po BENGALI ABBREVIATION SIGN +0A03 ; L # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; L # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; L # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; L # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; L # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; L # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; L # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A59..0A5C ; L # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; L # Lo GURMUKHI LETTER FA +0A66..0A6F ; L # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A72..0A74 ; L # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A76 ; L # Po GURMUKHI ABBREVIATION SIGN +0A83 ; L # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; L # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; L # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; L # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; L # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; L # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; L # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; L # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; L # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; L # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; L # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; L # Lo GUJARATI OM +0AE0..0AE1 ; L # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE6..0AEF ; L # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; L # Po GUJARATI ABBREVIATION SIGN +0AF9 ; L # Lo GUJARATI LETTER ZHA +0B02..0B03 ; L # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; L # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; L # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; L # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; L # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; L # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; L # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; L # Lo ORIYA SIGN AVAGRAHA +0B3E ; L # Mc ORIYA VOWEL SIGN AA +0B40 ; L # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; L # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; L # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B57 ; L # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; L # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; L # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B66..0B6F ; L # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; L # So ORIYA ISSHAR +0B71 ; L # Lo ORIYA LETTER WA +0B72..0B77 ; L # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B83 ; L # Lo TAMIL SIGN VISARGA +0B85..0B8A ; L # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; L # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; L # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; L # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; L # Lo TAMIL LETTER JA +0B9E..0B9F ; L # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; L # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; L # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; L # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; L # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC1..0BC2 ; L # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; L # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; L # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; L # Lo TAMIL OM +0BD7 ; L # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; L # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; L # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0C01..0C03 ; L # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; L # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; L # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; L # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; L # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; L # Lo TELUGU SIGN AVAGRAHA +0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; L # Po TELUGU SIGN SIDDHAM +0C7F ; L # So TELUGU SIGN TUUMU +0C80 ; L # Lo KANNADA SIGN SPACING CANDRABINDU +0C82..0C83 ; L # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; L # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; L # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; L # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; L # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; L # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; L # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; L # Lo KANNADA SIGN AVAGRAHA +0CBE ; L # Mc KANNADA VOWEL SIGN AA +0CBF ; L # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; L # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; L # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; L # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D02..0D03 ; L # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; L # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; L # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; L # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; L # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; L # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D46..0D48 ; L # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; L # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; L # Lo MALAYALAM LETTER DOT REPH +0D4F ; L # So MALAYALAM SIGN PARA +0D54..0D56 ; L # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; L # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E ; L # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; L # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D66..0D6F ; L # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; L # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; L # So MALAYALAM DATE MARK +0D7A..0D7F ; L # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D82..0D83 ; L # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; L # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; L # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; L # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; L # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; L # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCF..0DD1 ; L # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDF ; L # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; L # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; L # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; L # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; L # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; L # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E40..0E45 ; L # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; L # Lm THAI CHARACTER MAIYAMOK +0E4F ; L # Po THAI CHARACTER FONGMAN +0E50..0E59 ; L # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; L # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; L # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; L # Lo LAO LETTER KHO TAM +0E86..0E8A ; L # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; L # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; L # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; L # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; L # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; L # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; L # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; L # Lm LAO KO LA +0ED0..0ED9 ; L # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; L # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; L # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; L # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; L # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; L # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; L # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; L # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F1A..0F1F ; L # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; L # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; L # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; L # So TIBETAN MARK BSDUS RTAGS +0F36 ; L # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F38 ; L # So TIBETAN MARK CHE MGO +0F3E..0F3F ; L # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; L # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; L # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F7F ; L # Mc TIBETAN SIGN RNAM BCAD +0F85 ; L # Po TIBETAN MARK PALUTA +0F88..0F8C ; L # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0FBE..0FC5 ; L # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC7..0FCC ; L # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; L # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; L # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; L # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; L # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; L # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; L # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +1031 ; L # Mc MYANMAR VOWEL SIGN E +1038 ; L # Mc MYANMAR SIGN VISARGA +103B..103C ; L # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103F ; L # Lo MYANMAR LETTER GREAT SA +1040..1049 ; L # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; L # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; L # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; L # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +105A..105D ; L # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; L # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; L # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; L # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; L # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; L # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; L # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1083..1084 ; L # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1087..108C ; L # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108E ; L # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; L # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; L # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; L # L& GEORGIAN CAPITAL LETTER YN +10CD ; L # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; L # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; L # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; L # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; L # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; L # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; L # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; L # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; L # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; L # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; L # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; L # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; L # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; L # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; L # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; L # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; L # So CANADIAN SYLLABICS CHI SIGN +166E ; L # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; L # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; L # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; L # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; L # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; L # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; L # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; L # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1715 ; L # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; L # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1734 ; L # Mc HANUNOO SIGN PAMUDPOD +1735..1736 ; L # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; L # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; L # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; L # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; L # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; L # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; L # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; L # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D4..17D6 ; L # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; L # Lm KHMER SIGN LEK TOO +17D8..17DA ; L # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DC ; L # Lo KHMER SIGN AVAKRAHASANYA +17E0..17E9 ; L # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +1810..1819 ; L # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; L # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; L # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; L # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; L # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; L # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; L # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; L # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; L # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1923..1926 ; L # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; L # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; L # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; L # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1946..194F ; L # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; L # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; L # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; L # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; L # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; L # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; L # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; L # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A19..1A1A ; L # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1E..1A1F ; L # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; L # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; L # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; L # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A61 ; L # Mc TAI THAM VOWEL SIGN A +1A63..1A64 ; L # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A6D..1A72 ; L # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A80..1A89 ; L # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; L # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; L # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; L # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; L # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B04 ; L # Mc BALINESE SIGN BISAH +1B05..1B33 ; L # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B35 ; L # Mc BALINESE VOWEL SIGN TEDUNG +1B3B ; L # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; L # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; L # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; L # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B4E..1B4F ; L # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN +1B50..1B59 ; L # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; L # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; L # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B74..1B7C ; L # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7F ; L # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK +1B82 ; L # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; L # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; L # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; L # Mc SUNDANESE SIGN PAMAAEH +1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; L # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; L # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; L # Mc BATAK VOWEL SIGN U +1BF2..1BF3 ; L # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; L # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; L # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; L # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; L # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C3B..1C3F ; L # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; L # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; L # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; L # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C8A ; L # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD3 ; L # Po VEDIC SIGN NIHSHVASA +1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; L # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; L # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; L # Mc VEDIC SIGN ATIKRAMA +1CFA ; L # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; L # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; L # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; L # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; L # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; L # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; L # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; L # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; L # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; L # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; L # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; L # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; L # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; L # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; L # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; L # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; L # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; L # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; L # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; L # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; L # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; L # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200E ; L # Cf LEFT-TO-RIGHT MARK +2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; L # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; L # L& DOUBLE-STRUCK CAPITAL C +2107 ; L # L& EULER CONSTANT +210A..2113 ; L # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; L # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; L # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; L # L& DOUBLE-STRUCK CAPITAL Z +2126 ; L # L& OHM SIGN +2128 ; L # L& BLACK-LETTER CAPITAL Z +212A..212D ; L # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; L # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; L # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; L # L& INFORMATION SOURCE +213C..213F ; L # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; L # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; L # L& TURNED SMALL F +214F ; L # So SYMBOL FOR SAMARITAN SOURCE +2160..2182 ; L # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; L # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; L # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2336..237A ; L # So [69] APL FUNCTIONAL SYMBOL I-BEAM..APL FUNCTIONAL SYMBOL ALPHA +2395 ; L # So APL FUNCTIONAL SYMBOL QUAD +249C..24E9 ; L # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +26AC ; L # So MEDIUM SMALL WHITE CIRCLE +2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2C00..2C7B ; L # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; L # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; L # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; L # L& GEORGIAN SMALL LETTER YN +2D2D ; L # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; L # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; L # Po TIFINAGH SEPARATOR MARK +2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; L # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; L # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; L # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; L # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; L # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; L # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; L # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; L # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; L # Lm IDEOGRAPHIC ITERATION MARK +3006 ; L # Lo IDEOGRAPHIC CLOSING MARK +3007 ; L # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; L # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302E..302F ; L # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; L # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; L # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; L # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; L # Lo MASU MARK +3041..3096 ; L # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; L # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; L # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; L # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; L # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; L # Lo KATAKANA DIGRAPH KOTO +3105..312F ; L # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; L # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; L # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; L # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; L # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; L # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U +3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; L # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; L # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A +327F ; L # So KOREAN STANDARD SYMBOL +3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; L # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32C0..32CB ; L # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +32D0..3376 ; L # So [167] CIRCLED KATAKANA A..SQUARE PC +337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB +33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +3400..4DBF ; L # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; L # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; L # Lm YI SYLLABLE WU +A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; L # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; L # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; L # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; L # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; L # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; L # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; L # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; L # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; L # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; L # Lo CYRILLIC LETTER MULTIOCULAR O +A680..A69B ; L # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; L # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; L # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; L # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F2..A6F7 ; L # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A722..A76F ; L # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; L # Lm MODIFIER LETTER US +A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; L # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; L # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; L # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; L # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; L # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; L # Mc SYLOTI NAGRI VOWEL SIGN OO +A830..A835 ; L # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; L # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A840..A873 ; L # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; L # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; L # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; L # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8CE..A8CF ; L # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; L # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8F2..A8F7 ; L # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; L # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; L # Lo DEVANAGARI HEADSTROKE +A8FC ; L # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; L # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A900..A909 ; L # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; L # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A92E..A92F ; L # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; L # Lo [23] REJANG LETTER KA..REJANG LETTER A +A952..A953 ; L # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; L # Po REJANG SECTION MARK +A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A983 ; L # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; L # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; L # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; L # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9C0 ; L # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; L # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; L # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; L # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; L # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; L # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; L # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; L # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; L # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; L # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; L # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA2F..AA30 ; L # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; L # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA40..AA42 ; L # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; L # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4D ; L # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; L # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; L # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; L # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; L # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; L # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; L # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; L # Lo MYANMAR LETTER AITON RA +AA7B ; L # Mc MYANMAR SIGN PAO KAREN TONE +AA7D ; L # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; L # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; L # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; L # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; L # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; L # Lo TAI VIET TONE MAI NUENG +AAC2 ; L # Lo TAI VIET TONE MAI SONG +AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; L # Lm TAI VIET SYMBOL SAM +AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; L # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; L # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; L # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; L # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; L # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; L # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; L # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; L # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; L # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; L # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; L # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; L # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; L # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; L # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; L # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; L # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; L # Po MEETEI MAYEK CHEIKHEI +ABEC ; L # Mc MEETEI MAYEK LUM IYEK +ABF0..ABF9 ; L # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +E000..F8FF ; L # Co [6400] .. +F900..FA6D ; L # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; L # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; L # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; L # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; L # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; L # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; L # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; L # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; L # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; L # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; L # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; L # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; L # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; L # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; L # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; L # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; L # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; L # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100 ; L # Po AEGEAN WORD SEPARATOR LINE +10102 ; L # Po AEGEAN CHECK MARK +10107..10133 ; L # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; L # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +1018D..1018E ; L # So [2] GREEK INDICTION SIGN..NOMISMA SIGN +101D0..101FC ; L # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +10280..1029C ; L # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; L # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; L # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; L # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..10340 ; L # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; L # Nl GOTHIC LETTER NINETY +10342..10349 ; L # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; L # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; L # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; L # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; L # Po UGARITIC WORD DIVIDER +103A0..103C3 ; L # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; L # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; L # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; L # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; L # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; L # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; L # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; L # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; L # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; L # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; L # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; L # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; L # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; L # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; L # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; L # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; L # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; L # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; L # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; L # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; L # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +11000 ; L # Mc BRAHMI SIGN CANDRABINDU +11002 ; L # Mc BRAHMI SIGN VISARGA +11003..11037 ; L # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11047..1104D ; L # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11066..1106F ; L # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11071..11072 ; L # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; L # Lo BRAHMI LETTER OLD TAMIL LLA +11082 ; L # Mc KAITHI SIGN VISARGA +11083..110AF ; L # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; L # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; L # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; L # Cf KAITHI NUMBER SIGN +110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110CD ; L # Cf KAITHI NUMBER SIGN ABOVE +110D0..110E8 ; L # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; L # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; L # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; L # Mc CHAKMA VOWEL SIGN E +11136..1113F ; L # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; L # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; L # Lo CHAKMA LETTER LHAA +11145..11146 ; L # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; L # Lo CHAKMA LETTER VAA +11150..11172 ; L # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11174..11175 ; L # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; L # Lo MAHAJANI LIGATURE SHRI +11182 ; L # Mc SHARADA SIGN VISARGA +11183..111B2 ; L # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; L # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; L # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; L # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; L # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111CD ; L # Po SHARADA SUTRA MARK +111CE ; L # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111D0..111D9 ; L # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; L # Lo SHARADA EKAM +111DB ; L # Po SHARADA SIGN SIDDHAM +111DC ; L # Lo SHARADA HEADSTROKE +111DD..111DF ; L # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; L # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; L # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; L # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; L # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; L # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; L # Mc KHOJKI SIGN VIRAMA +11238..1123D ; L # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123F..11240 ; L # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11280..11286 ; L # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; L # Lo MULTANI LETTER GHA +1128A..1128D ; L # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; L # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; L # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; L # Po MULTANI SECTION MARK +112B0..112DE ; L # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112E0..112E2 ; L # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112F0..112F9 ; L # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11302..11303 ; L # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; L # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; L # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; L # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; L # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; L # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; L # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; L # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; L # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11341..11344 ; L # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; L # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; L # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; L # Lo GRANTHA OM +11357 ; L # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; L # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; L # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; L # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; L # Lo TULU-TIGALARI LETTER EE +1138E ; L # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; L # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; L # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; L # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; L # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; L # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; L # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; L # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; L # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D1 ; L # Lo TULU-TIGALARI REPHA +113D3 ; L # Lo TULU-TIGALARI SIGN PLUTA +113D4..113D5 ; L # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; L # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +11400..11434 ; L # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; L # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; L # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; L # Mc NEWA SIGN VISARGA +11447..1144A ; L # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; L # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; L # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; L # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; L # Po NEWA INSERTION SIGN +1145F..11461 ; L # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; L # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; L # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B9 ; L # Mc TIRHUTA VOWEL SIGN E +114BB..114BE ; L # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114C1 ; L # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; L # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; L # Po TIRHUTA ABBREVIATION SIGN +114C7 ; L # Lo TIRHUTA OM +114D0..114D9 ; L # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; L # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; L # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B8..115BB ; L # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; L # Mc SIDDHAM SIGN VISARGA +115C1..115D7 ; L # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; L # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; L # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; L # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; L # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; L # Mc MODI SIGN VISARGA +11641..11643 ; L # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; L # Lo MODI SIGN HUVA +11650..11659 ; L # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; L # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; L # Mc TAKRI SIGN VISARGA +116AE..116AF ; L # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; L # Mc TAKRI SIGN VIRAMA +116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; L # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; L # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171E ; L # Mc AHOM CONSONANT SIGN MEDIAL RA +11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11726 ; L # Mc AHOM VOWEL SIGN E +11730..11739 ; L # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; L # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; L # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; L # So AHOM SYMBOL VI +11740..11746 ; L # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; L # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; L # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; L # Mc DOGRA SIGN VISARGA +1183B ; L # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; L # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; L # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; L # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF..11906 ; L # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; L # Lo DIVES AKURU LETTER O +1190C..11913 ; L # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; L # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; L # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; L # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; L # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193D ; L # Mc DIVES AKURU SIGN HALANTA +1193F ; L # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; L # Mc DIVES AKURU MEDIAL YA +11941 ; L # Lo DIVES AKURU INITIAL RA +11942 ; L # Mc DIVES AKURU MEDIAL RA +11944..11946 ; L # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; L # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; L # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; L # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; L # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; L # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; L # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; L # Po NANDINAGARI SIGN SIDDHAM +119E3 ; L # Lo NANDINAGARI HEADSTROKE +119E4 ; L # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; L # Lo ZANABAZAR SQUARE LETTER A +11A07..11A08 ; L # Mn [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A0B..11A32 ; L # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A39 ; L # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; L # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3F..11A46 ; L # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A50 ; L # Lo SOYOMBO LETTER A +11A57..11A58 ; L # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A5C..11A89 ; L # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A97 ; L # Mc SOYOMBO SIGN VISARGA +11A9A..11A9C ; L # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; L # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; L # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; L # Mc BHAIKSUKI SIGN VISARGA +11C3F ; L # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; L # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; L # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; L # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; L # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; L # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; L # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11CA9 ; L # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; L # Mc MARCHEN VOWEL SIGN I +11CB4 ; L # Mc MARCHEN VOWEL SIGN O +11D00..11D06 ; L # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; L # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; L # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; L # Lo MASARAM GONDI REPHA +11D50..11D59 ; L # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; L # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; L # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; L # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; L # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; L # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; L # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; L # Lo GUNJALA GONDI OM +11DA0..11DA9 ; L # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F02 ; L # Lo KAWI SIGN REPHA +11F03 ; L # Mc KAWI SIGN VISARGA +11F04..11F10 ; L # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; L # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; L # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; L # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F41 ; L # Mc KAWI SIGN KILLER +11F43..11F4F ; L # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11F50..11F59 ; L # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11FB0 ; L # Lo LISU LETTER YHA +11FC0..11FD4 ; L # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FFF ; L # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; L # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; L # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; L # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; L # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; L # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; L # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342F ; L # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13430..1343F ; L # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13441..13446 ; L # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; L # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; L # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1612A..1612C ; L # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16130..16139 ; L # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE +16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; L # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; L # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; L # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; L # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; L # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF5 ; L # Po BASSA VAH FULL STOP +16B00..16B2F ; L # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B37..16B3B ; L # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; L # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; L # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; L # Po PAHAWH HMONG SIGN XAUS +16B45 ; L # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; L # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; L # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; L # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; L # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; L # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; L # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; L # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D6D..16D6F ; L # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA +16D70..16D79 ; L # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; L # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; L # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; L # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F93..16F9F ; L # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; L # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; L # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; L # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; L # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; L # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; L # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; L # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; L # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; L # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; L # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; L # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; L # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; L # So DUPLOYAN SIGN O WITH CROSS +1BC9F ; L # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1CCD6..1CCEF ; L # So [26] OUTLINED LATIN CAPITAL LETTER A..OUTLINED LATIN CAPITAL LETTER Z +1CF50..1CFC3 ; L # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; L # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; L # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; L # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; L # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D183..1D184 ; L # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; L # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1E8 ; L # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN +1D2C0..1D2D3 ; L # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN +1D2E0..1D2F3 ; L # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D360..1D378 ; L # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; L # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; L # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; L # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; L # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; L # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; L # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; L # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; L # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; L # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; L # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; L # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; L # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; L # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; L # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; L # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; L # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; L # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; L # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; L # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; L # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; L # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; L # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; L # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; L # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; L # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D800..1D9FF ; L # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA37..1DA3A ; L # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA6D..1DA74 ; L # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA76..1DA83 ; L # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA85..1DA86 ; L # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; L # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; L # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; L # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; L # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; L # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; L # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2F0..1E2F9 ; L # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4D0..1E4EA ; L # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; L # Lm NAG MUNDARI SIGN OJOD +1E4F0..1E4F9 ; L # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; L # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; L # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; L # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E5FF ; L # Po OL ONAL ABBREVIATION SIGN +1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; L # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1F110..1F12E ; L # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ +1F130..1F169 ; L # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F1AC ; L # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD +1F1E6..1F202 ; L # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA +1F210..1F23B ; L # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +20000..2A6DF ; L # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; L # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; L # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; L # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; L # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; L # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +F0000..FFFFD ; L # Co [65534] .. +100000..10FFFD; L # Co [65534] .. + +# The above property value applies to 815351 code points not listed here. +# Total code points: 1095513 + +# ================================================ + +# Bidi_Class=Right_To_Left + +05BE ; R # Pd HEBREW PUNCTUATION MAQAF +05C0 ; R # Po HEBREW PUNCTUATION PASEQ +05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ +05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA +05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; R # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; R # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +07C0..07C9 ; R # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; R # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; R # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; R # Lm NKO LAJANYALAN +07FE..07FF ; R # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; R # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; R # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; R # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; R # Lm SAMARITAN MODIFIER LETTER I +0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +085E ; R # Po MANDAIC PUNCTUATION +200F ; R # Cf RIGHT-TO-LEFT MARK +FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; R # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; R # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; R # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; R # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED +10800..10805 ; R # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; R # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; R # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; R # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; R # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; R # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10857 ; R # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; R # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; R # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; R # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; R # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; R # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF ; R # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108E0..108F2 ; R # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; R # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF ; R # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; R # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; R # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; R # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; R # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; R # Lo KHAROSHTHI LETTER A +10A10..10A13 ; R # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; R # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; R # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A40..10A48 ; R # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A60..10A7C ; R # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; R # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; R # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; R # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; R # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AC0..10AC7 ; R # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; R # So MANICHAEAN SIGN UD +10AC9..10AE4 ; R # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AEB..10AEF ; R # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; R # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B00..10B35 ; R # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; R # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; R # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; R # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; R # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; R # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C ; R # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF ; R # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D4A..10D4D ; R # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; R # Lm GARAY VOWEL LENGTH MARK +10D4F ; R # Lo GARAY SUKUN +10D50..10D65 ; R # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; R # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; R # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; R # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN +10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAD ; R # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F70..10F81 ; R # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F86..10F89 ; R # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10FB0..10FC4 ; R # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; R # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FE0..10FF6 ; R # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +1E800..1E8C4 ; R # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF ; R # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E900..1E943 ; R # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; R # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK + +# The above property value applies to 2087 code points not listed here. +# Total code points: 3631 + +# ================================================ + +# Bidi_Class=European_Number + +0030..0039 ; EN # Nd [10] DIGIT ZERO..DIGIT NINE +00B2..00B3 ; EN # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B9 ; EN # No SUPERSCRIPT ONE +06F0..06F9 ; EN # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +2070 ; EN # No SUPERSCRIPT ZERO +2074..2079 ; EN # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +2080..2089 ; EN # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +2488..249B ; EN # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP +FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +102E1..102FB ; EN # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +1CCF0..1CCF9 ; EN # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1D7CE..1D7FF ; EN # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1F100..1F10A ; EN # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1FBF0..1FBF9 ; EN # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE + +# Total code points: 178 + +# ================================================ + +# Bidi_Class=European_Separator + +002B ; ES # Sm PLUS SIGN +002D ; ES # Pd HYPHEN-MINUS +207A..207B ; ES # Sm [2] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT MINUS +208A..208B ; ES # Sm [2] SUBSCRIPT PLUS SIGN..SUBSCRIPT MINUS +2212 ; ES # Sm MINUS SIGN +FB29 ; ES # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FE62 ; ES # Sm SMALL PLUS SIGN +FE63 ; ES # Pd SMALL HYPHEN-MINUS +FF0B ; ES # Sm FULLWIDTH PLUS SIGN +FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 12 + +# ================================================ + +# Bidi_Class=European_Terminator + +0023 ; ET # Po NUMBER SIGN +0024 ; ET # Sc DOLLAR SIGN +0025 ; ET # Po PERCENT SIGN +00A2..00A5 ; ET # Sc [4] CENT SIGN..YEN SIGN +00B0 ; ET # So DEGREE SIGN +00B1 ; ET # Sm PLUS-MINUS SIGN +058F ; ET # Sc ARMENIAN DRAM SIGN +0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +066A ; ET # Po ARABIC PERCENT SIGN +09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09FB ; ET # Sc BENGALI GANDA MARK +0AF1 ; ET # Sc GUJARATI RUPEE SIGN +0BF9 ; ET # Sc TAMIL RUPEE SIGN +0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT +17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL +2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME +20A0..20C0 ; ET # Sc [33] EURO-CURRENCY SIGN..SOM SIGN +212E ; ET # So ESTIMATED SYMBOL +2213 ; ET # Sm MINUS-OR-PLUS SIGN +A838 ; ET # Sc NORTH INDIC RUPEE MARK +A839 ; ET # So NORTH INDIC QUANTITY MARK +FE5F ; ET # Po SMALL NUMBER SIGN +FE69 ; ET # Sc SMALL DOLLAR SIGN +FE6A ; ET # Po SMALL PERCENT SIGN +FF03 ; ET # Po FULLWIDTH NUMBER SIGN +FF04 ; ET # Sc FULLWIDTH DOLLAR SIGN +FF05 ; ET # Po FULLWIDTH PERCENT SIGN +FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +11FDD..11FE0 ; ET # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +1E2FF ; ET # Sc WANCHO NGUN SIGN + +# The above property value applies to 15 code points not listed here. +# Total code points: 92 + +# ================================================ + +# Bidi_Class=Arabic_Number + +0600..0605 ; AN # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR +06DD ; AN # Cf ARABIC END OF AYAH +0890..0891 ; AN # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; AN # Cf ARABIC DISPUTED END OF AYAH +10D30..10D39 ; AN # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; AN # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS + +# Total code points: 73 + +# ================================================ + +# Bidi_Class=Common_Separator + +002C ; CS # Po COMMA +002E..002F ; CS # Po [2] FULL STOP..SOLIDUS +003A ; CS # Po COLON +00A0 ; CS # Zs NO-BREAK SPACE +060C ; CS # Po ARABIC COMMA +202F ; CS # Zs NARROW NO-BREAK SPACE +2044 ; CS # Sm FRACTION SLASH +FE50 ; CS # Po SMALL COMMA +FE52 ; CS # Po SMALL FULL STOP +FE55 ; CS # Po SMALL COLON +FF0C ; CS # Po FULLWIDTH COMMA +FF0E..FF0F ; CS # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF1A ; CS # Po FULLWIDTH COLON + +# Total code points: 15 + +# ================================================ + +# Bidi_Class=Paragraph_Separator + +000A ; B # Cc +000D ; B # Cc +001C..001E ; B # Cc [3] .. +0085 ; B # Cc +2029 ; B # Zp PARAGRAPH SEPARATOR + +# Total code points: 7 + +# ================================================ + +# Bidi_Class=Segment_Separator + +0009 ; S # Cc +000B ; S # Cc +001F ; S # Cc + +# Total code points: 3 + +# ================================================ + +# Bidi_Class=White_Space + +000C ; WS # Cc +0020 ; WS # Zs SPACE +1680 ; WS # Zs OGHAM SPACE MARK +2000..200A ; WS # Zs [11] EN QUAD..HAIR SPACE +2028 ; WS # Zl LINE SEPARATOR +205F ; WS # Zs MEDIUM MATHEMATICAL SPACE +3000 ; WS # Zs IDEOGRAPHIC SPACE + +# Total code points: 17 + +# ================================================ + +# Bidi_Class=Other_Neutral + +0021..0022 ; ON # Po [2] EXCLAMATION MARK..QUOTATION MARK +0026..0027 ; ON # Po [2] AMPERSAND..APOSTROPHE +0028 ; ON # Ps LEFT PARENTHESIS +0029 ; ON # Pe RIGHT PARENTHESIS +002A ; ON # Po ASTERISK +003B ; ON # Po SEMICOLON +003C..003E ; ON # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; ON # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; ON # Ps LEFT SQUARE BRACKET +005C ; ON # Po REVERSE SOLIDUS +005D ; ON # Pe RIGHT SQUARE BRACKET +005E ; ON # Sk CIRCUMFLEX ACCENT +005F ; ON # Pc LOW LINE +0060 ; ON # Sk GRAVE ACCENT +007B ; ON # Ps LEFT CURLY BRACKET +007C ; ON # Sm VERTICAL LINE +007D ; ON # Pe RIGHT CURLY BRACKET +007E ; ON # Sm TILDE +00A1 ; ON # Po INVERTED EXCLAMATION MARK +00A6 ; ON # So BROKEN BAR +00A7 ; ON # Po SECTION SIGN +00A8 ; ON # Sk DIAERESIS +00A9 ; ON # So COPYRIGHT SIGN +00AB ; ON # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; ON # Sm NOT SIGN +00AE ; ON # So REGISTERED SIGN +00AF ; ON # Sk MACRON +00B4 ; ON # Sk ACUTE ACCENT +00B6..00B7 ; ON # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; ON # Sk CEDILLA +00BB ; ON # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; ON # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; ON # Po INVERTED QUESTION MARK +00D7 ; ON # Sm MULTIPLICATION SIGN +00F7 ; ON # Sm DIVISION SIGN +02B9..02BA ; ON # Lm [2] MODIFIER LETTER PRIME..MODIFIER LETTER DOUBLE PRIME +02C2..02C5 ; ON # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02CF ; ON # Lm [10] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER LOW ACUTE ACCENT +02D2..02DF ; ON # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02EB ; ON # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; ON # Lm MODIFIER LETTER VOICING +02ED ; ON # Sk MODIFIER LETTER UNASPIRATED +02EF..02FF ; ON # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0374 ; ON # Lm GREEK NUMERAL SIGN +0375 ; ON # Sk GREEK LOWER NUMERAL SIGN +037E ; ON # Po GREEK QUESTION MARK +0384..0385 ; ON # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0387 ; ON # Po GREEK ANO TELEIA +03F6 ; ON # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +058A ; ON # Pd ARMENIAN HYPHEN +058D..058E ; ON # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +0606..0607 ; ON # Sm [2] ARABIC-INDIC CUBE ROOT..ARABIC-INDIC FOURTH ROOT +060E..060F ; ON # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +06DE ; ON # So ARABIC START OF RUB EL HIZB +06E9 ; ON # So ARABIC PLACE OF SAJDAH +07F6 ; ON # So NKO SYMBOL OO DENNEN +07F7..07F9 ; ON # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +0BF3..0BF8 ; ON # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BFA ; ON # So TAMIL NUMBER SIGN +0C78..0C7E ; ON # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0F3A ; ON # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; ON # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; ON # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; ON # Pe TIBETAN MARK ANG KHANG GYAS +1390..1399 ; ON # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +1400 ; ON # Pd CANADIAN SYLLABICS HYPHEN +169B ; ON # Ps OGHAM FEATHER MARK +169C ; ON # Pe OGHAM REVERSED FEATHER MARK +17F0..17F9 ; ON # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; ON # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; ON # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; ON # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +1940 ; ON # So LIMBU SIGN LOO +1944..1945 ; ON # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +19DE..19FF ; ON # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC +1FBD ; ON # Sk GREEK KORONIS +1FBF..1FC1 ; ON # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; ON # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; ON # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; ON # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; ON # Sk [2] GREEK OXIA..GREEK DASIA +2010..2015 ; ON # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; ON # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; ON # Pi LEFT SINGLE QUOTATION MARK +2019 ; ON # Pf RIGHT SINGLE QUOTATION MARK +201A ; ON # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; ON # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; ON # Pf RIGHT DOUBLE QUOTATION MARK +201E ; ON # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; ON # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; ON # Po [8] DAGGER..HYPHENATION POINT +2035..2038 ; ON # Po [4] REVERSED PRIME..CARET +2039 ; ON # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; ON # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; ON # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; ON # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; ON # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2045 ; ON # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; ON # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; ON # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; ON # Sm COMMERCIAL MINUS SIGN +2053 ; ON # Po SWUNG DASH +2054 ; ON # Pc INVERTED UNDERTIE +2055..205E ; ON # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +207C ; ON # Sm SUPERSCRIPT EQUALS SIGN +207D ; ON # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; ON # Pe SUPERSCRIPT RIGHT PARENTHESIS +208C ; ON # Sm SUBSCRIPT EQUALS SIGN +208D ; ON # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; ON # Pe SUBSCRIPT RIGHT PARENTHESIS +2100..2101 ; ON # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA +2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT +2114 ; ON # So L B BAR SYMBOL +2116..2117 ; ON # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +2118 ; ON # Sm SCRIPT CAPITAL P +211E..2123 ; ON # So [6] PRESCRIPTION TAKE..VERSICLE +2125 ; ON # So OUNCE SIGN +2127 ; ON # So INVERTED OHM SIGN +2129 ; ON # So TURNED GREEK SMALL LETTER IOTA +213A..213B ; ON # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +2140..2144 ; ON # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +214A ; ON # So PROPERTY LINE +214B ; ON # Sm TURNED AMPERSAND +214C..214D ; ON # So [2] PER SIGN..AKTIESELSKAB +2150..215F ; ON # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; ON # No VULGAR FRACTION ZERO THIRDS +218A..218B ; ON # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; ON # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; ON # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; ON # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; ON # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; ON # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; ON # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; ON # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; ON # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; ON # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; ON # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; ON # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; ON # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; ON # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; ON # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; ON # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; ON # So DOWNWARDS DOUBLE ARROW +21D4 ; ON # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; ON # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..2211 ; ON # Sm [30] RIGHT ARROW WITH SMALL CIRCLE..N-ARY SUMMATION +2214..22FF ; ON # Sm [236] DOT PLUS..Z NOTATION BAG MEMBERSHIP +2300..2307 ; ON # So [8] DIAMETER SIGN..WAVY LINE +2308 ; ON # Ps LEFT CEILING +2309 ; ON # Pe RIGHT CEILING +230A ; ON # Ps LEFT FLOOR +230B ; ON # Pe RIGHT FLOOR +230C..231F ; ON # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; ON # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; ON # So [7] FROWN..KEYBOARD +2329 ; ON # Ps LEFT-POINTING ANGLE BRACKET +232A ; ON # Pe RIGHT-POINTING ANGLE BRACKET +232B..2335 ; ON # So [11] ERASE TO THE LEFT..COUNTERSINK +237B ; ON # So NOT CHECK MARK +237C ; ON # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..2394 ; ON # So [24] SHOULDERED OPEN BOX..SOFTWARE-FUNCTION SYMBOL +2396..239A ; ON # So [5] DECIMAL SEPARATOR KEY SYMBOL..CLEAR SCREEN SYMBOL +239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2429 ; ON # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM +2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY +24EA..24FF ; ON # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; ON # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; ON # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; ON # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; ON # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; ON # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; ON # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; ON # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; ON # Sm MUSIC SHARP SIGN +2670..26AB ; ON # So [60] WEST SYRIAC CROSS..MEDIUM BLACK CIRCLE +26AD..2767 ; ON # So [187] MARRIAGE SYMBOL..ROTATED FLORAL HEART BULLET +2768 ; ON # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; ON # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; ON # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; ON # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; ON # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; ON # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; ON # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; ON # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; ON # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; ON # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; ON # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; ON # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; ON # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; ON # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; ON # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794..27BF ; ON # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; ON # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; ON # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; ON # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; ON # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; ON # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; ON # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; ON # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; ON # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; ON # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; ON # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; ON # Ps LEFT WHITE CURLY BRACKET +2984 ; ON # Pe RIGHT WHITE CURLY BRACKET +2985 ; ON # Ps LEFT WHITE PARENTHESIS +2986 ; ON # Pe RIGHT WHITE PARENTHESIS +2987 ; ON # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; ON # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; ON # Ps Z NOTATION LEFT BINDING BRACKET +298A ; ON # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; ON # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; ON # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; ON # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; ON # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; ON # Ps LEFT ARC LESS-THAN BRACKET +2994 ; ON # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; ON # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; ON # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; ON # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; ON # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; ON # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; ON # Ps LEFT WIGGLY FENCE +29D9 ; ON # Pe RIGHT WIGGLY FENCE +29DA ; ON # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; ON # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; ON # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; ON # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; ON # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; ON # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; ON # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; ON # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; ON # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; ON # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; ON # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; ON # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2E00..2E01 ; ON # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; ON # Pi LEFT SUBSTITUTION BRACKET +2E03 ; ON # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; ON # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; ON # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; ON # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; ON # Pi LEFT TRANSPOSITION BRACKET +2E0A ; ON # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; ON # Po RAISED SQUARE +2E0C ; ON # Pi LEFT RAISED OMISSION BRACKET +2E0D ; ON # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; ON # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; ON # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; ON # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; ON # Pd HYPHEN WITH DIAERESIS +2E1B ; ON # Po TILDE WITH RING ABOVE +2E1C ; ON # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; ON # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; ON # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; ON # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; ON # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; ON # Ps TOP LEFT HALF BRACKET +2E23 ; ON # Pe TOP RIGHT HALF BRACKET +2E24 ; ON # Ps BOTTOM LEFT HALF BRACKET +2E25 ; ON # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; ON # Ps LEFT SIDEWAYS U BRACKET +2E27 ; ON # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; ON # Ps LEFT DOUBLE PARENTHESIS +2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; ON # Lm VERTICAL TILDE +2E30..2E39 ; ON # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; ON # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; ON # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; ON # Pd DOUBLE HYPHEN +2E41 ; ON # Po REVERSED COMMA +2E42 ; ON # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; ON # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; ON # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; ON # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; ON # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; ON # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; ON # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; ON # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; ON # Ps TOP HALF LEFT PARENTHESIS +2E5A ; ON # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; ON # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; ON # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; ON # Pd OBLIQUE HYPHEN +2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFF ; ON # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION +3001..3003 ; ON # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; ON # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3008 ; ON # Ps LEFT ANGLE BRACKET +3009 ; ON # Pe RIGHT ANGLE BRACKET +300A ; ON # Ps LEFT DOUBLE ANGLE BRACKET +300B ; ON # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; ON # Ps LEFT CORNER BRACKET +300D ; ON # Pe RIGHT CORNER BRACKET +300E ; ON # Ps LEFT WHITE CORNER BRACKET +300F ; ON # Pe RIGHT WHITE CORNER BRACKET +3010 ; ON # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; ON # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; ON # So [2] POSTAL MARK..GETA MARK +3014 ; ON # Ps LEFT TORTOISE SHELL BRACKET +3015 ; ON # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; ON # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; ON # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; ON # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; ON # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; ON # Ps LEFT WHITE SQUARE BRACKET +301B ; ON # Pe RIGHT WHITE SQUARE BRACKET +301C ; ON # Pd WAVE DASH +301D ; ON # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; ON # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; ON # So POSTAL MARK FACE +3030 ; ON # Pd WAVY DASH +3036..3037 ; ON # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303D ; ON # Po PART ALTERNATION MARK +303E..303F ; ON # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FB ; ON # Po KATAKANA MIDDLE DOT +31C0..31E5 ; ON # So [38] CJK STROKE T..CJK STROKE SZP +31EF ; ON # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION +321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU +3250 ; ON # So PARTNERSHIP SIGN +3251..325F ; ON # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +327C..327E ; ON # So [3] CIRCLED KOREAN CHARACTER CHAMKO..CIRCLED HANGUL IEUNG U +32B1..32BF ; ON # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32CC..32CF ; ON # So [4] SQUARE HG..LIMITED LIABILITY SIGN +3377..337A ; ON # So [4] SQUARE DM..SQUARE IU +33DE..33DF ; ON # So [2] SQUARE V OVER M..SQUARE A OVER M +33FF ; ON # So SQUARE GAL +4DC0..4DFF ; ON # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A490..A4C6 ; ON # So [55] YI RADICAL QOT..YI RADICAL KE +A60D..A60F ; ON # Po [3] VAI COMMA..VAI QUESTION MARK +A673 ; ON # Po SLAVONIC ASTERISK +A67E ; ON # Po CYRILLIC KAVYKA +A67F ; ON # Lm CYRILLIC PAYEROK +A700..A716 ; ON # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; ON # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; ON # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FD3E ; ON # Pe ORNATE LEFT PARENTHESIS +FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; ON # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; ON # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; ON # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; ON # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; ON # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; ON # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; ON # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE51 ; ON # Po SMALL IDEOGRAPHIC COMMA +FE54 ; ON # Po SMALL SEMICOLON +FE56..FE57 ; ON # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FE58 ; ON # Pd SMALL EM DASH +FE59 ; ON # Ps SMALL LEFT PARENTHESIS +FE5A ; ON # Pe SMALL RIGHT PARENTHESIS +FE5B ; ON # Ps SMALL LEFT CURLY BRACKET +FE5C ; ON # Pe SMALL RIGHT CURLY BRACKET +FE5D ; ON # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; ON # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE60..FE61 ; ON # Po [2] SMALL AMPERSAND..SMALL ASTERISK +FE64..FE66 ; ON # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; ON # Po SMALL REVERSE SOLIDUS +FE6B ; ON # Po SMALL COMMERCIAL AT +FF01..FF02 ; ON # Po [2] FULLWIDTH EXCLAMATION MARK..FULLWIDTH QUOTATION MARK +FF06..FF07 ; ON # Po [2] FULLWIDTH AMPERSAND..FULLWIDTH APOSTROPHE +FF08 ; ON # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; ON # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; ON # Po FULLWIDTH ASTERISK +FF1B ; ON # Po FULLWIDTH SEMICOLON +FF1C..FF1E ; ON # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; ON # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3B ; ON # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; ON # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; ON # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; ON # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; ON # Pc FULLWIDTH LOW LINE +FF40 ; ON # Sk FULLWIDTH GRAVE ACCENT +FF5B ; ON # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; ON # Sm FULLWIDTH VERTICAL LINE +FF5D ; ON # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; ON # Sm FULLWIDTH TILDE +FF5F ; ON # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; ON # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; ON # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; ON # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; ON # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; ON # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FFE2 ; ON # Sm FULLWIDTH NOT SIGN +FFE3 ; ON # Sk FULLWIDTH MACRON +FFE4 ; ON # So FULLWIDTH BROKEN BAR +FFE8 ; ON # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; ON # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; ON # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; ON # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10101 ; ON # Po AEGEAN WORD SEPARATOR DOT +10140..10174 ; ON # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; ON # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; ON # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; ON # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C ; ON # So GREEK SINUSOID SIGN +10190..1019C ; ON # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; ON # So GREEK SYMBOL TAU RHO +1091F ; ON # Po PHOENICIAN WORD SEPARATOR +10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10D6E ; ON # Pd GARAY HYPHEN +11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +16FE2 ; ON # Po OLD CHINESE HOOK MARK +1CC00..1CCD5 ; ON # So [214] UP-POINTING GO-KART..LOWER RIGHT QUADRANT STANDING KNIGHT +1CD00..1CEB3 ; ON # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON +1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D245 ; ON # So GREEK MUSICAL LEIMMA +1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D6C1 ; ON # Sm MATHEMATICAL BOLD NABLA +1D6DB ; ON # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; ON # Sm MATHEMATICAL ITALIC NABLA +1D715 ; ON # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; ON # Sm MATHEMATICAL BOLD ITALIC NABLA +1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; ON # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; ON # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; ON # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F10B..1F10C ; ON # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F ; ON # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F12F ; ON # So COPYLEFT SYMBOL +1F16A..1F16F ; ON # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F1AD ; ON # So MASK WORK SYMBOL +1F260..1F265 ; ON # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; ON # So [251] CYCLONE..AMPHORA +1F3FB..1F3FF ; ON # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F6D7 ; ON # So [728] RAT..ELEVATOR +1F6DC..1F6EC ; ON # So [17] WIRELESS..AIRPLANE ARRIVING +1F6F0..1F6FC ; ON # So [13] SATELLITE..ROLLER SKATE +1F700..1F776 ; ON # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F7D9 ; ON # So [95] HAUMEA..NINE POINTED WHITE STAR +1F7E0..1F7EB ; ON # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; ON # So HEAVY EQUALS SIGN +1F800..1F80B ; ON # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; ON # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8BB ; ON # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; ON # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH +1FA80..1FA89 ; ON # So [10] YO-YO..HARP +1FA8F..1FAC6 ; ON # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; ON # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; ON # So [11] SPLATTER..FACE WITH BAGS UNDER EYES +1FAF0..1FAF8 ; ON # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND +1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE + +# Total code points: 6751 + +# ================================================ + +# Bidi_Class=Boundary_Neutral + +0000..0008 ; BN # Cc [9] .. +000E..001B ; BN # Cc [14] .. +007F..0084 ; BN # Cc [6] .. +0086..009F ; BN # Cc [26] .. +00AD ; BN # Cf SOFT HYPHEN +180E ; BN # Cf MONGOLIAN VOWEL SEPARATOR +200B..200D ; BN # Cf [3] ZERO WIDTH SPACE..ZERO WIDTH JOINER +2060..2064 ; BN # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; BN # Cn +206A..206F ; BN # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +FDD0..FDEF ; BN # Cn [32] .. +FEFF ; BN # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; BN # Cn [9] .. +FFFE..FFFF ; BN # Cn [2] .. +1BCA0..1BCA3 ; BN # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; BN # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1FFFE..1FFFF ; BN # Cn [2] .. +2FFFE..2FFFF ; BN # Cn [2] .. +3FFFE..3FFFF ; BN # Cn [2] .. +4FFFE..4FFFF ; BN # Cn [2] .. +5FFFE..5FFFF ; BN # Cn [2] .. +6FFFE..6FFFF ; BN # Cn [2] .. +7FFFE..7FFFF ; BN # Cn [2] .. +8FFFE..8FFFF ; BN # Cn [2] .. +9FFFE..9FFFF ; BN # Cn [2] .. +AFFFE..AFFFF ; BN # Cn [2] .. +BFFFE..BFFFF ; BN # Cn [2] .. +CFFFE..CFFFF ; BN # Cn [2] .. +DFFFE..E0000 ; BN # Cn [3] .. +E0001 ; BN # Cf LANGUAGE TAG +E0002..E001F ; BN # Cn [30] .. +E0020..E007F ; BN # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; BN # Cn [128] .. +E01F0..E0FFF ; BN # Cn [3600] .. +EFFFE..EFFFF ; BN # Cn [2] .. +FFFFE..FFFFF ; BN # Cn [2] .. +10FFFE..10FFFF; BN # Cn [2] .. + +# Total code points: 4016 + +# ================================================ + +# Bidi_Class=Nonspacing_Mark + +0300..036F ; NSM # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; NSM # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; NSM # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; NSM # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; NSM # Mn HEBREW POINT RAFE +05C1..05C2 ; NSM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN +0610..061A ; NSM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; NSM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; NSM # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; NSM # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; NSM # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; NSM # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; NSM # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; NSM # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; NSM # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; NSM # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; NSM # Mn NKO DANTAYALAN +0816..0819 ; NSM # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; NSM # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0897..089F ; NSM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE +093C ; NSM # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; NSM # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; NSM # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; NSM # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; NSM # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; NSM # Mn BENGALI SIGN CANDRABINDU +09BC ; NSM # Mn BENGALI SIGN NUKTA +09C1..09C4 ; NSM # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; NSM # Mn BENGALI SIGN VIRAMA +09E2..09E3 ; NSM # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; NSM # Mn BENGALI SANDHI MARK +0A01..0A02 ; NSM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; NSM # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; NSM # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; NSM # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; NSM # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; NSM # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; NSM # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; NSM # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; NSM # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; NSM # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; NSM # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; NSM # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; NSM # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; NSM # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; NSM # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; NSM # Mn ORIYA SIGN CANDRABINDU +0B3C ; NSM # Mn ORIYA SIGN NUKTA +0B3F ; NSM # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; NSM # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; NSM # Mn TAMIL SIGN ANUSVARA +0BC0 ; NSM # Mn TAMIL VOWEL SIGN II +0BCD ; NSM # Mn TAMIL SIGN VIRAMA +0C00 ; NSM # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; NSM # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; NSM # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; NSM # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; NSM # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; NSM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; NSM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; NSM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; NSM # Mn KANNADA SIGN CANDRABINDU +0CBC ; NSM # Mn KANNADA SIGN NUKTA +0CCC..0CCD ; NSM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; NSM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; NSM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; NSM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; NSM # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; NSM # Mn MALAYALAM SIGN VIRAMA +0D62..0D63 ; NSM # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; NSM # Mn SINHALA SIGN CANDRABINDU +0DCA ; NSM # Mn SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; NSM # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; NSM # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; NSM # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; NSM # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; NSM # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; NSM # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; NSM # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; NSM # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; NSM # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; NSM # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; NSM # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; NSM # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; NSM # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; NSM # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; NSM # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; NSM # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; NSM # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; NSM # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; NSM # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; NSM # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; NSM # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; NSM # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; NSM # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; NSM # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; NSM # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; NSM # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; NSM # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; NSM # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; NSM # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; NSM # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; NSM # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; NSM # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; NSM # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; NSM # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; NSM # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; NSM # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; NSM # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; NSM # Mn KHMER SIGN ATTHACAN +180B..180D ; NSM # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; NSM # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; NSM # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; NSM # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; NSM # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; NSM # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; NSM # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; NSM # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; NSM # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; NSM # Mn BUGINESE VOWEL SIGN AE +1A56 ; NSM # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; NSM # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; NSM # Mn TAI THAM SIGN SAKOT +1A62 ; NSM # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; NSM # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; NSM # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; NSM # Mn BALINESE SIGN REREKAN +1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; NSM # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; NSM # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; NSM # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; NSM # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; NSM # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; NSM # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; NSM # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; NSM # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; NSM # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; NSM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; NSM # Mn VEDIC SIGN TIRYAK +1CF4 ; NSM # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; NSM # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; NSM # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; NSM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; NSM # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; NSM # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; NSM # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; NSM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; NSM # Mn COMBINING CYRILLIC VZMET +A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; NSM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; NSM # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; NSM # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; NSM # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; NSM # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; NSM # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; NSM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; NSM # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; NSM # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; NSM # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; NSM # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; NSM # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; NSM # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; NSM # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; NSM # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; NSM # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; NSM # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; NSM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; NSM # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; NSM # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; NSM # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; NSM # Mn TAI VIET MAI KANG +AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; NSM # Mn TAI VIET TONE MAI THO +AAEC..AAED ; NSM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; NSM # Mn MEETEI MAYEK VIRAMA +ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; NSM # Mn MEETEI MAYEK APUN IYEK +FB1E ; NSM # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; NSM # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +101FD ; NSM # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; NSM # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; NSM # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; NSM # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; NSM # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; NSM # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; NSM # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; NSM # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; NSM # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; NSM # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; NSM # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; NSM # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; NSM # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; NSM # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; NSM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; NSM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; NSM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; NSM # Mn MAHAJANI SIGN NUKTA +11180..11181 ; NSM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; NSM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; NSM # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; NSM # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; NSM # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; NSM # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; NSM # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; NSM # Mn KHOJKI SIGN SUKUN +11241 ; NSM # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; NSM # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; NSM # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; NSM # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; NSM # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; NSM # Mn GRANTHA VOWEL SIGN II +11366..1136C ; NSM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; NSM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; NSM # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; NSM # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; NSM # Mn TULU-TIGALARI CONJOINER +113D2 ; NSM # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; NSM # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11438..1143F ; NSM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; NSM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; NSM # Mn NEWA SIGN NUKTA +1145E ; NSM # Mn NEWA SANDHI MARK +114B3..114B8 ; NSM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; NSM # Mn TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; NSM # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; NSM # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; NSM # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; NSM # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; NSM # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; NSM # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; NSM # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; NSM # Mn MODI SIGN ANUSVARA +1163F..11640 ; NSM # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; NSM # Mn TAKRI SIGN ANUSVARA +116AD ; NSM # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; NSM # Mn TAKRI SIGN NUKTA +1171D ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; NSM # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; NSM # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; NSM # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; NSM # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; NSM # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; NSM # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; NSM # Mn DIVES AKURU VIRAMA +11943 ; NSM # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; NSM # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; NSM # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; NSM # Mn NANDINAGARI SIGN VIRAMA +11A01..11A06 ; NSM # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A09..11A0A ; NSM # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; NSM # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; NSM # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; NSM # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; NSM # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; NSM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; NSM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; NSM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; NSM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; NSM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C92..11CA7 ; NSM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; NSM # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; NSM # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; NSM # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; NSM # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; NSM # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; NSM # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; NSM # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; NSM # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; NSM # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; NSM # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; NSM # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; NSM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; NSM # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; NSM # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; NSM # Mn KAWI VOWEL SIGN EU +11F42 ; NSM # Mn KAWI CONJOINER +11F5A ; NSM # Mn KAWI SIGN NUKTA +13440 ; NSM # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; NSM # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; NSM # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; NSM # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; NSM # Mn KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; NSM # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; NSM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; NSM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; NSM # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; NSM # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; NSM # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; NSM # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; NSM # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; NSM # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; NSM # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; NSM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; NSM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; NSM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; NSM # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; NSM # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; NSM # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; NSM # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; NSM # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2028 + +# ================================================ + +# Bidi_Class=Arabic_Letter + +0608 ; AL # Sm ARABIC RAY +060B ; AL # Sc AFGHANI SIGN +060D ; AL # Po ARABIC DATE SEPARATOR +061B ; AL # Po ARABIC SEMICOLON +061C ; AL # Cf ARABIC LETTER MARK +061D..061F ; AL # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; AL # Lm ARABIC TATWEEL +0641..064A ; AL # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066D ; AL # Po ARABIC FIVE POINTED STAR +066E..066F ; AL # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; AL # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; AL # Po ARABIC FULL STOP +06D5 ; AL # Lo ARABIC LETTER AE +06E5..06E6 ; AL # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; AL # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; AL # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; AL # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F ; AL # Cf SYRIAC ABBREVIATION MARK +0710 ; AL # Lo SYRIAC LETTER ALAPH +0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; AL # Lo THAANA LETTER NAA +0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; AL # Sk ARABIC RAISED ROUND DOT +0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; AL # Lm ARABIC SMALL FARSI YEH +FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; AL # Sc RIAL SIGN +FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +1EC71..1ECAB ; AL # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; AL # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; AL # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; AL # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; AL # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; AL # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; AL # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; AL # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# The above property value applies to 293 code points not listed here. +# Total code points: 1767 + +# ================================================ + +# Bidi_Class=Left_To_Right_Override + +202D ; LRO # Cf LEFT-TO-RIGHT OVERRIDE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Override + +202E ; RLO # Cf RIGHT-TO-LEFT OVERRIDE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Left_To_Right_Embedding + +202A ; LRE # Cf LEFT-TO-RIGHT EMBEDDING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Embedding + +202B ; RLE # Cf RIGHT-TO-LEFT EMBEDDING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Pop_Directional_Format + +202C ; PDF # Cf POP DIRECTIONAL FORMATTING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Left_To_Right_Isolate + +2066 ; LRI # Cf LEFT-TO-RIGHT ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Isolate + +2067 ; RLI # Cf RIGHT-TO-LEFT ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=First_Strong_Isolate + +2068 ; FSI # Cf FIRST STRONG ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Pop_Directional_Isolate + +2069 ; PDI # Cf POP DIRECTIONAL ISOLATE + +# Total code points: 1 + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/DerivedCoreProperties.txt b/3rd/pcre2/maint/Unicode.tables/DerivedCoreProperties.txt new file mode 100644 index 00000000..1075638f --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/DerivedCoreProperties.txt @@ -0,0 +1,13362 @@ +# DerivedCoreProperties-16.0.0.txt +# Date: 2024-05-31, 18:09:32 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Derived Property: Math +# Generated from: Sm + Other_Math + +002B ; Math # Sm PLUS SIGN +003C..003E ; Math # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +005E ; Math # Sk CIRCUMFLEX ACCENT +007C ; Math # Sm VERTICAL LINE +007E ; Math # Sm TILDE +00AC ; Math # Sm NOT SIGN +00B1 ; Math # Sm PLUS-MINUS SIGN +00D7 ; Math # Sm MULTIPLICATION SIGN +00F7 ; Math # Sm DIVISION SIGN +03D0..03D2 ; Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F6 ; Math # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +0606..0608 ; Math # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +2016 ; Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Math # Pc CHARACTER TIE +2044 ; Math # Sm FRACTION SLASH +2052 ; Math # Sm COMMERCIAL MINUS SIGN +2061..2064 ; Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207A..207C ; Math # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208A..208C ; Math # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Math # L& EULER CONSTANT +210A..2113 ; Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Math # L& DOUBLE-STRUCK CAPITAL N +2118 ; Math # Sm SCRIPT CAPITAL P +2119..211D ; Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Math # L& BLACK-LETTER CAPITAL Z +2129 ; Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Math # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214B ; Math # Sm TURNED AMPERSAND +2190..2194 ; Math # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Math # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Math # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Math # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Math # Sm RIGHTWARDS ARROW FROM BAR +21A7 ; Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21AE ; Math # Sm LEFT RIGHT ARROW WITH STROKE +21B0..21B1 ; Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Math # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Math # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Math # So DOWNWARDS DOUBLE ARROW +21D4 ; Math # Sm LEFT RIGHT DOUBLE ARROW +21D5..21DB ; Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +21F4..22FF ; Math # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2308 ; Math # Ps LEFT CEILING +2309 ; Math # Pe RIGHT CEILING +230A ; Math # Ps LEFT FLOOR +230B ; Math # Pe RIGHT FLOOR +2320..2321 ; Math # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +237C ; Math # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +239B..23B3 ; Math # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23B5 ; Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Math # So RADICAL SYMBOL BOTTOM +23D0 ; Math # So VERTICAL LINE EXTENSION +23DC..23E1 ; Math # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2 ; Math # So WHITE TRAPEZIUM +25A0..25A1 ; Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Math # Sm WHITE RIGHT-POINTING TRIANGLE +25BC..25C0 ; Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Math # Sm WHITE LEFT-POINTING TRIANGLE +25C6..25C7 ; Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +25F8..25FF ; Math # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2605..2606 ; Math # So [2] BLACK STAR..WHITE STAR +2640 ; Math # So FEMALE SIGN +2642 ; Math # So MALE SIGN +2660..2663 ; Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +266F ; Math # Sm MUSIC SHARP SIGN +27C0..27C4 ; Math # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Math # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Math # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Math # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; Math # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Math # Ps LEFT WHITE PARENTHESIS +2986 ; Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Math # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Math # Ps LEFT WIGGLY FENCE +29D9 ; Math # Pe RIGHT WIGGLY FENCE +29DA ; Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Math # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Math # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Math # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B30..2B44 ; Math # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B47..2B4C ; Math # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +FB29 ; Math # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FE61 ; Math # Po SMALL ASTERISK +FE62 ; Math # Sm SMALL PLUS SIGN +FE63 ; Math # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Math # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Math # Po SMALL REVERSE SOLIDUS +FF0B ; Math # Sm FULLWIDTH PLUS SIGN +FF1C..FF1E ; Math # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF3C ; Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF5C ; Math # Sm FULLWIDTH VERTICAL LINE +FF5E ; Math # Sm FULLWIDTH TILDE +FFE2 ; Math # Sm FULLWIDTH NOT SIGN +FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +10D8E..10D8F ; Math # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN +1D400..1D454 ; Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Math # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Math # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Math # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Math # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Math # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Math # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Math # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + +# Total code points: 2312 + +# ================================================ + +# Derived Property: Alphabetic +# Generated from: Uppercase + Lowercase + Lt + Lm + Lo + Nl + Other_Alphabetic + +0041..005A ; Alphabetic # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Alphabetic # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Alphabetic # Lo FEMININE ORDINAL INDICATOR +00B5 ; Alphabetic # L& MICRO SIGN +00BA ; Alphabetic # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Alphabetic # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Alphabetic # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Alphabetic # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Alphabetic # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Alphabetic # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Alphabetic # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Alphabetic # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Alphabetic # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; Alphabetic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; Alphabetic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; Alphabetic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; Alphabetic # Lm MODIFIER LETTER VOICING +02EE ; Alphabetic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0345 ; Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +0363..036F ; Alphabetic # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X +0370..0373 ; Alphabetic # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; Alphabetic # Lm GREEK NUMERAL SIGN +0376..0377 ; Alphabetic # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Alphabetic # Lm GREEK YPOGEGRAMMENI +037B..037D ; Alphabetic # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Alphabetic # L& GREEK CAPITAL LETTER YOT +0386 ; Alphabetic # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Alphabetic # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Alphabetic # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Alphabetic # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; Alphabetic # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; Alphabetic # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; Alphabetic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Alphabetic # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Alphabetic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; Alphabetic # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +05B0..05BD ; Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Alphabetic # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; Alphabetic # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Alphabetic # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0610..061A ; Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +0620..063F ; Alphabetic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; Alphabetic # Lm ARABIC TATWEEL +0641..064A ; Alphabetic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..0657 ; Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +066E..066F ; Alphabetic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; Alphabetic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; Alphabetic # Lo ARABIC LETTER AE +06D6..06DC ; Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Alphabetic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Alphabetic # Mn ARABIC SMALL LOW MEEM +06EE..06EF ; Alphabetic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; Alphabetic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; Alphabetic # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; Alphabetic # Lo SYRIAC LETTER ALAPH +0711 ; Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; Alphabetic # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..073F ; Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +074D..07A5 ; Alphabetic # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07A6..07B0 ; Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; Alphabetic # Lo THAANA LETTER NAA +07CA..07EA ; Alphabetic # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; Alphabetic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; Alphabetic # Lm NKO LAJANYALAN +0800..0815 ; Alphabetic # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0817 ; Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081A ; Alphabetic # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER I +0829..082C ; Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0897 ; Alphabetic # Mn ARABIC PEPET +08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH +08D4..08DF ; Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA +08E3..08E9 ; Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Alphabetic # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Alphabetic # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093D ; Alphabetic # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; Alphabetic # Lo DEVANAGARI OM +0955..0957 ; Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; Alphabetic # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0971 ; Alphabetic # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; Alphabetic # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0981 ; Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Alphabetic # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Alphabetic # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Alphabetic # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Alphabetic # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Alphabetic # Lo BENGALI LETTER LA +09B6..09B9 ; Alphabetic # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; Alphabetic # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; Alphabetic # Lo BENGALI LETTER KHANDA TA +09D7 ; Alphabetic # Mc BENGALI AU LENGTH MARK +09DC..09DD ; Alphabetic # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Alphabetic # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09F0..09F1 ; Alphabetic # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; Alphabetic # Lo BENGALI LETTER VEDIC ANUSVARA +0A01..0A02 ; Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Alphabetic # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Alphabetic # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Alphabetic # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Alphabetic # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Alphabetic # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Alphabetic # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Alphabetic # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Alphabetic # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Alphabetic # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; Alphabetic # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Alphabetic # Lo GURMUKHI LETTER FA +0A70..0A71 ; Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; Alphabetic # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Alphabetic # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Alphabetic # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Alphabetic # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Alphabetic # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Alphabetic # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Alphabetic # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Alphabetic # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; Alphabetic # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; Alphabetic # Lo GUJARATI OM +0AE0..0AE1 ; Alphabetic # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AF9 ; Alphabetic # Lo GUJARATI LETTER ZHA +0AFA..0AFC ; Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0B01 ; Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Alphabetic # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Alphabetic # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Alphabetic # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Alphabetic # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Alphabetic # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Alphabetic # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; Alphabetic # Lo ORIYA SIGN AVAGRAHA +0B3E ; Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Alphabetic # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; Alphabetic # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Alphabetic # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B71 ; Alphabetic # Lo ORIYA LETTER WA +0B82 ; Alphabetic # Mn TAMIL SIGN ANUSVARA +0B83 ; Alphabetic # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Alphabetic # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Alphabetic # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Alphabetic # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Alphabetic # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Alphabetic # Lo TAMIL LETTER JA +0B9E..0B9F ; Alphabetic # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Alphabetic # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Alphabetic # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Alphabetic # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; Alphabetic # Lo TAMIL OM +0BD7 ; Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; Alphabetic # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; Alphabetic # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Alphabetic # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Alphabetic # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; Alphabetic # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; Alphabetic # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; Alphabetic # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Alphabetic # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Alphabetic # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Alphabetic # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Alphabetic # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; Alphabetic # Lo KANNADA SIGN AVAGRAHA +0CBE ; Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; Alphabetic # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; Alphabetic # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Alphabetic # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; Alphabetic # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; Alphabetic # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; Alphabetic # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; Alphabetic # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D5F..0D61 ; Alphabetic # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D7A..0D7F ; Alphabetic # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; Alphabetic # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Alphabetic # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Alphabetic # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Alphabetic # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Alphabetic # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Alphabetic # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCF..0DD1 ; Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E01..0E30 ; Alphabetic # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; Alphabetic # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; Alphabetic # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Alphabetic # Lm THAI CHARACTER MAIYAMOK +0E4D ; Alphabetic # Mn THAI CHARACTER NIKHAHIT +0E81..0E82 ; Alphabetic # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Alphabetic # Lo LAO LETTER KHO TAM +0E86..0E8A ; Alphabetic # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; Alphabetic # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; Alphabetic # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; Alphabetic # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; Alphabetic # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EB9 ; Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0EBD ; Alphabetic # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Alphabetic # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Alphabetic # Lm LAO KO LA +0ECD ; Alphabetic # Mn LAO NIGGAHITA +0EDC..0EDF ; Alphabetic # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; Alphabetic # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; Alphabetic # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Alphabetic # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F83 ; Alphabetic # Mn [4] TIBETAN VOWEL SIGN REVERSED I..TIBETAN SIGN SNA LDAN +0F88..0F8C ; Alphabetic # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +1000..102A ; Alphabetic # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; Alphabetic # Lo MYANMAR LETTER GREAT SA +1050..1055 ; Alphabetic # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; Alphabetic # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; Alphabetic # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Alphabetic # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Alphabetic # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Alphabetic # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Alphabetic # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; Alphabetic # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Alphabetic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Alphabetic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; Alphabetic # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Alphabetic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Alphabetic # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +10A0..10C5 ; Alphabetic # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Alphabetic # L& GEORGIAN CAPITAL LETTER YN +10CD ; Alphabetic # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Alphabetic # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Alphabetic # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Alphabetic # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; Alphabetic # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Alphabetic # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Alphabetic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Alphabetic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Alphabetic # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Alphabetic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Alphabetic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Alphabetic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; Alphabetic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; Alphabetic # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Alphabetic # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; Alphabetic # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; Alphabetic # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; Alphabetic # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; Alphabetic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; Alphabetic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; Alphabetic # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; Alphabetic # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1713 ; Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +171F..1731 ; Alphabetic # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1732..1733 ; Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1740..1751 ; Alphabetic # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; Alphabetic # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Alphabetic # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; Alphabetic # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D7 ; Alphabetic # Lm KHMER SIGN LEK TOO +17DC ; Alphabetic # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; Alphabetic # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Alphabetic # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; Alphabetic # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; Alphabetic # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; Alphabetic # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; Alphabetic # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; Alphabetic # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; Alphabetic # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1950..196D ; Alphabetic # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Alphabetic # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; Alphabetic # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; Alphabetic # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; Alphabetic # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A20..1A54 ; Alphabetic # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1AA7 ; Alphabetic # Lm TAI THAM SIGN MAI YAMOK +1ABF..1AC0 ; Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW +1ACC..1ACE ; Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Alphabetic # Mc BALINESE SIGN BISAH +1B05..1B33 ; Alphabetic # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B35 ; Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B45..1B4C ; Alphabetic # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B80..1B81 ; Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Alphabetic # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; Alphabetic # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; Alphabetic # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C00..1C23 ; Alphabetic # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36 ; Alphabetic # Mn LEPCHA SIGN RAN +1C4D..1C4F ; Alphabetic # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; Alphabetic # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C8A ; Alphabetic # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; Alphabetic # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Alphabetic # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; Alphabetic # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Alphabetic # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; Alphabetic # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; Alphabetic # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Alphabetic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Alphabetic # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DD3..1DF4 ; Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1E00..1F15 ; Alphabetic # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Alphabetic # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Alphabetic # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Alphabetic # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Alphabetic # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Alphabetic # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Alphabetic # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Alphabetic # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Alphabetic # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Alphabetic # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Alphabetic # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Alphabetic # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Alphabetic # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Alphabetic # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Alphabetic # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Alphabetic # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; Alphabetic # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Alphabetic # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Alphabetic # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Alphabetic # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL C +2107 ; Alphabetic # L& EULER CONSTANT +210A..2113 ; Alphabetic # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Alphabetic # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL Z +2126 ; Alphabetic # L& OHM SIGN +2128 ; Alphabetic # L& BLACK-LETTER CAPITAL Z +212A..212D ; Alphabetic # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; Alphabetic # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; Alphabetic # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Alphabetic # L& INFORMATION SOURCE +213C..213F ; Alphabetic # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Alphabetic # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Alphabetic # L& TURNED SMALL F +2160..2182 ; Alphabetic # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Alphabetic # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Alphabetic # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +24B6..24E9 ; Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C7B ; Alphabetic # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Alphabetic # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; Alphabetic # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; Alphabetic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Alphabetic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Alphabetic # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Alphabetic # L& GEORGIAN SMALL LETTER YN +2D2D ; Alphabetic # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Alphabetic # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Alphabetic # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; Alphabetic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E2F ; Alphabetic # Lm VERTICAL TILDE +3005 ; Alphabetic # Lm IDEOGRAPHIC ITERATION MARK +3006 ; Alphabetic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Alphabetic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Alphabetic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3031..3035 ; Alphabetic # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; Alphabetic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Alphabetic # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; Alphabetic # Lo MASU MARK +3041..3096 ; Alphabetic # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; Alphabetic # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Alphabetic # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; Alphabetic # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; Alphabetic # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; Alphabetic # Lo KATAKANA DIGRAPH KOTO +3105..312F ; Alphabetic # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; Alphabetic # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; Alphabetic # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; Alphabetic # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; Alphabetic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; Alphabetic # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; Alphabetic # Lm YI SYLLABLE WU +A016..A48C ; Alphabetic # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; Alphabetic # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Alphabetic # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; Alphabetic # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Alphabetic # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; Alphabetic # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; Alphabetic # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; Alphabetic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Alphabetic # Lo CYRILLIC LETTER MULTIOCULAR O +A674..A67B ; Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A67F ; Alphabetic # Lm CYRILLIC PAYEROK +A680..A69B ; Alphabetic # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Alphabetic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; Alphabetic # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Alphabetic # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A717..A71F ; Alphabetic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; Alphabetic # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Alphabetic # Lm MODIFIER LETTER US +A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; Alphabetic # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; Alphabetic # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Alphabetic # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; Alphabetic # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A802 ; Alphabetic # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; Alphabetic # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; Alphabetic # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; Alphabetic # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; Alphabetic # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A840..A873 ; Alphabetic # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Alphabetic # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C5 ; Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU +A8F2..A8F7 ; Alphabetic # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; Alphabetic # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; Alphabetic # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; Alphabetic # Mn DEVANAGARI VOWEL SIGN AY +A90A..A925 ; Alphabetic # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92A ; Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A930..A946 ; Alphabetic # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Alphabetic # Mc REJANG CONSONANT SIGN H +A960..A97C ; Alphabetic # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Alphabetic # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Alphabetic # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Alphabetic # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9BF ; Alphabetic # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +A9CF ; Alphabetic # Lm JAVANESE PANGRANGKEP +A9E0..A9E4 ; Alphabetic # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; Alphabetic # Mn MYANMAR SIGN SHAN SAW +A9E6 ; Alphabetic # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; Alphabetic # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; Alphabetic # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; Alphabetic # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; Alphabetic # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; Alphabetic # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AA60..AA6F ; Alphabetic # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Alphabetic # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Alphabetic # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; Alphabetic # Lo MYANMAR LETTER AITON RA +AA7B ; Alphabetic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Alphabetic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Alphabetic # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; Alphabetic # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB0 ; Alphabetic # Mn TAI VIET MAI KANG +AAB1 ; Alphabetic # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; Alphabetic # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; Alphabetic # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE ; Alphabetic # Mn TAI VIET VOWEL AM +AAC0 ; Alphabetic # Lo TAI VIET TONE MAI NUENG +AAC2 ; Alphabetic # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Alphabetic # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Alphabetic # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; Alphabetic # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; Alphabetic # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Alphabetic # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; Alphabetic # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Alphabetic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Alphabetic # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Alphabetic # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; Alphabetic # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; Alphabetic # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +AC00..D7A3 ; Alphabetic # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Alphabetic # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Alphabetic # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; Alphabetic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Alphabetic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; Alphabetic # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Alphabetic # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; Alphabetic # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; Alphabetic # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; Alphabetic # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Alphabetic # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Alphabetic # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Alphabetic # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Alphabetic # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; Alphabetic # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; Alphabetic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; Alphabetic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Alphabetic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; Alphabetic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; Alphabetic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Alphabetic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; Alphabetic # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Alphabetic # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; Alphabetic # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; Alphabetic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; Alphabetic # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; Alphabetic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; Alphabetic # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Alphabetic # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Alphabetic # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Alphabetic # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; Alphabetic # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Alphabetic # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Alphabetic # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Alphabetic # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Alphabetic # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Alphabetic # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Alphabetic # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; Alphabetic # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; Alphabetic # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; Alphabetic # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; Alphabetic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; Alphabetic # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; Alphabetic # Nl GOTHIC LETTER NINETY +10342..10349 ; Alphabetic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Alphabetic # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; Alphabetic # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; Alphabetic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; Alphabetic # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Alphabetic # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; Alphabetic # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; Alphabetic # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; Alphabetic # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; Alphabetic # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Alphabetic # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; Alphabetic # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; Alphabetic # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; Alphabetic # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Alphabetic # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Alphabetic # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Alphabetic # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Alphabetic # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Alphabetic # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Alphabetic # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Alphabetic # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; Alphabetic # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; Alphabetic # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; Alphabetic # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; Alphabetic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Alphabetic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Alphabetic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; Alphabetic # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Alphabetic # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Alphabetic # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Alphabetic # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Alphabetic # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; Alphabetic # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; Alphabetic # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; Alphabetic # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; Alphabetic # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; Alphabetic # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A +10A01..10A03 ; Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; Alphabetic # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Alphabetic # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; Alphabetic # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; Alphabetic # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; Alphabetic # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; Alphabetic # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; Alphabetic # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; Alphabetic # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; Alphabetic # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; Alphabetic # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; Alphabetic # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; Alphabetic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; Alphabetic # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Alphabetic # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; Alphabetic # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4A..10D4D ; Alphabetic # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; Alphabetic # Lm GARAY VOWEL LENGTH MARK +10D4F ; Alphabetic # Lo GARAY SUKUN +10D50..10D65 ; Alphabetic # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69 ; Alphabetic # Mn GARAY VOWEL SIGN E +10D6F ; Alphabetic # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; Alphabetic # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10E80..10EA9 ; Alphabetic # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; Alphabetic # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; Alphabetic # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; Alphabetic # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Alphabetic # Mc BRAHMI SIGN VISARGA +11003..11037 ; Alphabetic # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11045 ; Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11071..11072 ; Alphabetic # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; Alphabetic # Lo BRAHMI LETTER OLD TAMIL LLA +11080..11081 ; Alphabetic # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; Alphabetic # Mc KAITHI SIGN VISARGA +11083..110AF ; Alphabetic # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110C2 ; Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R +110D0..110E8 ; Alphabetic # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11100..11102 ; Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Alphabetic # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11144 ; Alphabetic # Lo CHAKMA LETTER LHAA +11145..11146 ; Alphabetic # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; Alphabetic # Lo CHAKMA LETTER VAA +11150..11172 ; Alphabetic # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; Alphabetic # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Alphabetic # Mc SHARADA SIGN VISARGA +11183..111B2 ; Alphabetic # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Alphabetic # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; Alphabetic # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111CE ; Alphabetic # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; Alphabetic # Mn SHARADA SIGN INVERTED CANDRABINDU +111DA ; Alphabetic # Lo SHARADA EKAM +111DC ; Alphabetic # Lo SHARADA HEADSTROKE +11200..11211 ; Alphabetic # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; Alphabetic # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Alphabetic # Mn KHOJKI SIGN SHADDA +1123E ; Alphabetic # Mn KHOJKI SIGN SUKUN +1123F..11240 ; Alphabetic # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; Alphabetic # Mn KHOJKI VOWEL SIGN VOCALIC R +11280..11286 ; Alphabetic # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; Alphabetic # Lo MULTANI LETTER GHA +1128A..1128D ; Alphabetic # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; Alphabetic # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; Alphabetic # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; Alphabetic # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; Alphabetic # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; Alphabetic # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; Alphabetic # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; Alphabetic # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; Alphabetic # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; Alphabetic # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; Alphabetic # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11350 ; Alphabetic # Lo GRANTHA OM +11357 ; Alphabetic # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; Alphabetic # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; Alphabetic # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Alphabetic # Lo TULU-TIGALARI LETTER EE +1138E ; Alphabetic # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; Alphabetic # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Alphabetic # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113D1 ; Alphabetic # Lo TULU-TIGALARI REPHA +113D3 ; Alphabetic # Lo TULU-TIGALARI SIGN PLUTA +11400..11434 ; Alphabetic # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11443..11444 ; Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA +11445 ; Alphabetic # Mc NEWA SIGN VISARGA +11447..1144A ; Alphabetic # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; Alphabetic # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; Alphabetic # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Alphabetic # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; Alphabetic # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; Alphabetic # Lo TIRHUTA OM +11580..115AE ; Alphabetic # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Alphabetic # Mc SIDDHAM SIGN VISARGA +115D8..115DB ; Alphabetic # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; Alphabetic # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Alphabetic # Mc MODI SIGN VISARGA +11640 ; Alphabetic # Mn MODI SIGN ARDHACANDRA +11644 ; Alphabetic # Lo MODI SIGN HUVA +11680..116AA ; Alphabetic # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B8 ; Alphabetic # Lo TAKRI LETTER ARCHAIC KHA +11700..1171A ; Alphabetic # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D ; Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +11740..11746 ; Alphabetic # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; Alphabetic # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; Alphabetic # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Alphabetic # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Alphabetic # Mc DOGRA SIGN VISARGA +118A0..118DF ; Alphabetic # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; Alphabetic # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; Alphabetic # Lo DIVES AKURU LETTER O +1190C..11913 ; Alphabetic # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; Alphabetic # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; Alphabetic # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; Alphabetic # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Alphabetic # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; Alphabetic # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193F ; Alphabetic # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; Alphabetic # Mc DIVES AKURU MEDIAL YA +11941 ; Alphabetic # Lo DIVES AKURU INITIAL RA +11942 ; Alphabetic # Mc DIVES AKURU MEDIAL RA +119A0..119A7 ; Alphabetic # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; Alphabetic # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; Alphabetic # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Alphabetic # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Alphabetic # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Alphabetic # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; Alphabetic # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; Alphabetic # Lo NANDINAGARI HEADSTROKE +119E4 ; Alphabetic # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; Alphabetic # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; Alphabetic # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; Alphabetic # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A35..11A38 ; Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; Alphabetic # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A50 ; Alphabetic # Lo SOYOMBO LETTER A +11A51..11A56 ; Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; Alphabetic # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA +11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Alphabetic # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Alphabetic # Mc BHAIKSUKI SIGN VISARGA +11C40 ; Alphabetic # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; Alphabetic # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Alphabetic # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Alphabetic # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Alphabetic # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; Alphabetic # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; Alphabetic # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; Alphabetic # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D46 ; Alphabetic # Lo MASARAM GONDI REPHA +11D47 ; Alphabetic # Mn MASARAM GONDI RA-KARA +11D60..11D65 ; Alphabetic # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; Alphabetic # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; Alphabetic # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; Alphabetic # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Alphabetic # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Alphabetic # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Alphabetic # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; Alphabetic # Lo GUNJALA GONDI OM +11EE0..11EF2 ; Alphabetic # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Alphabetic # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; Alphabetic # Lo KAWI SIGN REPHA +11F03 ; Alphabetic # Mc KAWI SIGN VISARGA +11F04..11F10 ; Alphabetic # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; Alphabetic # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; Alphabetic # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Alphabetic # Mn KAWI VOWEL SIGN EU +11FB0 ; Alphabetic # Lo LISU LETTER YHA +12000..12399 ; Alphabetic # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; Alphabetic # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; Alphabetic # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; Alphabetic # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; Alphabetic # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; Alphabetic # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; Alphabetic # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; Alphabetic # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Alphabetic # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA +16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; Alphabetic # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; Alphabetic # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; Alphabetic # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; Alphabetic # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; Alphabetic # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; Alphabetic # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; Alphabetic # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; Alphabetic # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; Alphabetic # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; Alphabetic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16E40..16E7F ; Alphabetic # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; Alphabetic # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Alphabetic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; Alphabetic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; Alphabetic # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; Alphabetic # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; Alphabetic # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; Alphabetic # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; Alphabetic # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; Alphabetic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; Alphabetic # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Alphabetic # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Alphabetic # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Alphabetic # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9E ; Alphabetic # Mn DUPLOYAN DOUBLE MARK +1D400..1D454 ; Alphabetic # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Alphabetic # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Alphabetic # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Alphabetic # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Alphabetic # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Alphabetic # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Alphabetic # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Alphabetic # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Alphabetic # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Alphabetic # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Alphabetic # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Alphabetic # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Alphabetic # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Alphabetic # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Alphabetic # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Alphabetic # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Alphabetic # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Alphabetic # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Alphabetic # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Alphabetic # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Alphabetic # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Alphabetic # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Alphabetic # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Alphabetic # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Alphabetic # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Alphabetic # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Alphabetic # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Alphabetic # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; Alphabetic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; Alphabetic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E100..1E12C ; Alphabetic # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Alphabetic # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; Alphabetic # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; Alphabetic # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Alphabetic # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; Alphabetic # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; Alphabetic # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; Alphabetic # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; Alphabetic # Lo OL ONAL SIGN HODDOND +1E7E0..1E7E6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; Alphabetic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; Alphabetic # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; Alphabetic # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; Alphabetic # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E947 ; Alphabetic # Mn ADLAM HAMZA +1E94B ; Alphabetic # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Alphabetic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Alphabetic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Alphabetic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Alphabetic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Alphabetic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Alphabetic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Alphabetic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1F130..1F149 ; Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z +20000..2A6DF ; Alphabetic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; Alphabetic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 142759 + +# ================================================ + +# Derived Property: Lowercase +# Generated from: Ll + Other_Lowercase + +0061..007A ; Lowercase # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Lowercase # Lo FEMININE ORDINAL INDICATOR +00B5 ; Lowercase # L& MICRO SIGN +00BA ; Lowercase # Lo MASCULINE ORDINAL INDICATOR +00DF..00F6 ; Lowercase # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Lowercase # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Lowercase # L& LATIN SMALL LETTER A WITH MACRON +0103 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE +0105 ; Lowercase # L& LATIN SMALL LETTER A WITH OGONEK +0107 ; Lowercase # L& LATIN SMALL LETTER C WITH ACUTE +0109 ; Lowercase # L& LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Lowercase # L& LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Lowercase # L& LATIN SMALL LETTER C WITH CARON +010F ; Lowercase # L& LATIN SMALL LETTER D WITH CARON +0111 ; Lowercase # L& LATIN SMALL LETTER D WITH STROKE +0113 ; Lowercase # L& LATIN SMALL LETTER E WITH MACRON +0115 ; Lowercase # L& LATIN SMALL LETTER E WITH BREVE +0117 ; Lowercase # L& LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Lowercase # L& LATIN SMALL LETTER E WITH OGONEK +011B ; Lowercase # L& LATIN SMALL LETTER E WITH CARON +011D ; Lowercase # L& LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Lowercase # L& LATIN SMALL LETTER G WITH BREVE +0121 ; Lowercase # L& LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Lowercase # L& LATIN SMALL LETTER G WITH CEDILLA +0125 ; Lowercase # L& LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Lowercase # L& LATIN SMALL LETTER H WITH STROKE +0129 ; Lowercase # L& LATIN SMALL LETTER I WITH TILDE +012B ; Lowercase # L& LATIN SMALL LETTER I WITH MACRON +012D ; Lowercase # L& LATIN SMALL LETTER I WITH BREVE +012F ; Lowercase # L& LATIN SMALL LETTER I WITH OGONEK +0131 ; Lowercase # L& LATIN SMALL LETTER DOTLESS I +0133 ; Lowercase # L& LATIN SMALL LIGATURE IJ +0135 ; Lowercase # L& LATIN SMALL LETTER J WITH CIRCUMFLEX +0137..0138 ; Lowercase # L& [2] LATIN SMALL LETTER K WITH CEDILLA..LATIN SMALL LETTER KRA +013A ; Lowercase # L& LATIN SMALL LETTER L WITH ACUTE +013C ; Lowercase # L& LATIN SMALL LETTER L WITH CEDILLA +013E ; Lowercase # L& LATIN SMALL LETTER L WITH CARON +0140 ; Lowercase # L& LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Lowercase # L& LATIN SMALL LETTER L WITH STROKE +0144 ; Lowercase # L& LATIN SMALL LETTER N WITH ACUTE +0146 ; Lowercase # L& LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Lowercase # L& [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Lowercase # L& LATIN SMALL LETTER ENG +014D ; Lowercase # L& LATIN SMALL LETTER O WITH MACRON +014F ; Lowercase # L& LATIN SMALL LETTER O WITH BREVE +0151 ; Lowercase # L& LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Lowercase # L& LATIN SMALL LIGATURE OE +0155 ; Lowercase # L& LATIN SMALL LETTER R WITH ACUTE +0157 ; Lowercase # L& LATIN SMALL LETTER R WITH CEDILLA +0159 ; Lowercase # L& LATIN SMALL LETTER R WITH CARON +015B ; Lowercase # L& LATIN SMALL LETTER S WITH ACUTE +015D ; Lowercase # L& LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Lowercase # L& LATIN SMALL LETTER S WITH CEDILLA +0161 ; Lowercase # L& LATIN SMALL LETTER S WITH CARON +0163 ; Lowercase # L& LATIN SMALL LETTER T WITH CEDILLA +0165 ; Lowercase # L& LATIN SMALL LETTER T WITH CARON +0167 ; Lowercase # L& LATIN SMALL LETTER T WITH STROKE +0169 ; Lowercase # L& LATIN SMALL LETTER U WITH TILDE +016B ; Lowercase # L& LATIN SMALL LETTER U WITH MACRON +016D ; Lowercase # L& LATIN SMALL LETTER U WITH BREVE +016F ; Lowercase # L& LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Lowercase # L& LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Lowercase # L& LATIN SMALL LETTER U WITH OGONEK +0175 ; Lowercase # L& LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Lowercase # L& LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Lowercase # L& LATIN SMALL LETTER Z WITH ACUTE +017C ; Lowercase # L& LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Lowercase # L& [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Lowercase # L& LATIN SMALL LETTER B WITH TOPBAR +0185 ; Lowercase # L& LATIN SMALL LETTER TONE SIX +0188 ; Lowercase # L& LATIN SMALL LETTER C WITH HOOK +018C..018D ; Lowercase # L& [2] LATIN SMALL LETTER D WITH TOPBAR..LATIN SMALL LETTER TURNED DELTA +0192 ; Lowercase # L& LATIN SMALL LETTER F WITH HOOK +0195 ; Lowercase # L& LATIN SMALL LETTER HV +0199..019B ; Lowercase # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE +019E ; Lowercase # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Lowercase # L& LATIN SMALL LETTER O WITH HORN +01A3 ; Lowercase # L& LATIN SMALL LETTER OI +01A5 ; Lowercase # L& LATIN SMALL LETTER P WITH HOOK +01A8 ; Lowercase # L& LATIN SMALL LETTER TONE TWO +01AA..01AB ; Lowercase # L& [2] LATIN LETTER REVERSED ESH LOOP..LATIN SMALL LETTER T WITH PALATAL HOOK +01AD ; Lowercase # L& LATIN SMALL LETTER T WITH HOOK +01B0 ; Lowercase # L& LATIN SMALL LETTER U WITH HORN +01B4 ; Lowercase # L& LATIN SMALL LETTER Y WITH HOOK +01B6 ; Lowercase # L& LATIN SMALL LETTER Z WITH STROKE +01B9..01BA ; Lowercase # L& [2] LATIN SMALL LETTER EZH REVERSED..LATIN SMALL LETTER EZH WITH TAIL +01BD..01BF ; Lowercase # L& [3] LATIN SMALL LETTER TONE FIVE..LATIN LETTER WYNN +01C6 ; Lowercase # L& LATIN SMALL LETTER DZ WITH CARON +01C9 ; Lowercase # L& LATIN SMALL LETTER LJ +01CC ; Lowercase # L& LATIN SMALL LETTER NJ +01CE ; Lowercase # L& LATIN SMALL LETTER A WITH CARON +01D0 ; Lowercase # L& LATIN SMALL LETTER I WITH CARON +01D2 ; Lowercase # L& LATIN SMALL LETTER O WITH CARON +01D4 ; Lowercase # L& LATIN SMALL LETTER U WITH CARON +01D6 ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Lowercase # L& [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Lowercase # L& LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Lowercase # L& LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Lowercase # L& LATIN SMALL LETTER AE WITH MACRON +01E5 ; Lowercase # L& LATIN SMALL LETTER G WITH STROKE +01E7 ; Lowercase # L& LATIN SMALL LETTER G WITH CARON +01E9 ; Lowercase # L& LATIN SMALL LETTER K WITH CARON +01EB ; Lowercase # L& LATIN SMALL LETTER O WITH OGONEK +01ED ; Lowercase # L& LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F0 ; Lowercase # L& [2] LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON +01F3 ; Lowercase # L& LATIN SMALL LETTER DZ +01F5 ; Lowercase # L& LATIN SMALL LETTER G WITH ACUTE +01F9 ; Lowercase # L& LATIN SMALL LETTER N WITH GRAVE +01FB ; Lowercase # L& LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Lowercase # L& LATIN SMALL LETTER AE WITH ACUTE +01FF ; Lowercase # L& LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Lowercase # L& LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Lowercase # L& LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Lowercase # L& LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Lowercase # L& LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Lowercase # L& LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Lowercase # L& LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Lowercase # L& LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Lowercase # L& LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Lowercase # L& LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Lowercase # L& LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Lowercase # L& LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Lowercase # L& LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Lowercase # L& LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Lowercase # L& LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Lowercase # L& LATIN SMALL LETTER YOGH +021F ; Lowercase # L& LATIN SMALL LETTER H WITH CARON +0221 ; Lowercase # L& LATIN SMALL LETTER D WITH CURL +0223 ; Lowercase # L& LATIN SMALL LETTER OU +0225 ; Lowercase # L& LATIN SMALL LETTER Z WITH HOOK +0227 ; Lowercase # L& LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Lowercase # L& LATIN SMALL LETTER E WITH CEDILLA +022B ; Lowercase # L& LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Lowercase # L& LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Lowercase # L& LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Lowercase # L& LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233..0239 ; Lowercase # L& [7] LATIN SMALL LETTER Y WITH MACRON..LATIN SMALL LETTER QP DIGRAPH +023C ; Lowercase # L& LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Lowercase # L& [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Lowercase # L& LATIN SMALL LETTER GLOTTAL STOP +0247 ; Lowercase # L& LATIN SMALL LETTER E WITH STROKE +0249 ; Lowercase # L& LATIN SMALL LETTER J WITH STROKE +024B ; Lowercase # L& LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Lowercase # L& LATIN SMALL LETTER R WITH STROKE +024F..0293 ; Lowercase # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL +0295..02AF ; Lowercase # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +0371 ; Lowercase # L& GREEK SMALL LETTER HETA +0373 ; Lowercase # L& GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Lowercase # L& GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Lowercase # Lm GREEK YPOGEGRAMMENI +037B..037D ; Lowercase # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Lowercase # L& GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Lowercase # L& [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Lowercase # L& [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Lowercase # L& [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Lowercase # L& GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Lowercase # L& GREEK SMALL LETTER STIGMA +03DD ; Lowercase # L& GREEK SMALL LETTER DIGAMMA +03DF ; Lowercase # L& GREEK SMALL LETTER KOPPA +03E1 ; Lowercase # L& GREEK SMALL LETTER SAMPI +03E3 ; Lowercase # L& COPTIC SMALL LETTER SHEI +03E5 ; Lowercase # L& COPTIC SMALL LETTER FEI +03E7 ; Lowercase # L& COPTIC SMALL LETTER KHEI +03E9 ; Lowercase # L& COPTIC SMALL LETTER HORI +03EB ; Lowercase # L& COPTIC SMALL LETTER GANGIA +03ED ; Lowercase # L& COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Lowercase # L& [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Lowercase # L& GREEK LUNATE EPSILON SYMBOL +03F8 ; Lowercase # L& GREEK SMALL LETTER SHO +03FB..03FC ; Lowercase # L& [2] GREEK SMALL LETTER SAN..GREEK RHO WITH STROKE SYMBOL +0430..045F ; Lowercase # L& [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Lowercase # L& CYRILLIC SMALL LETTER OMEGA +0463 ; Lowercase # L& CYRILLIC SMALL LETTER YAT +0465 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Lowercase # L& CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Lowercase # L& CYRILLIC SMALL LETTER BIG YUS +046D ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Lowercase # L& CYRILLIC SMALL LETTER KSI +0471 ; Lowercase # L& CYRILLIC SMALL LETTER PSI +0473 ; Lowercase # L& CYRILLIC SMALL LETTER FITA +0475 ; Lowercase # L& CYRILLIC SMALL LETTER IZHITSA +0477 ; Lowercase # L& CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Lowercase # L& CYRILLIC SMALL LETTER UK +047B ; Lowercase # L& CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Lowercase # L& CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Lowercase # L& CYRILLIC SMALL LETTER OT +0481 ; Lowercase # L& CYRILLIC SMALL LETTER KOPPA +048B ; Lowercase # L& CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Lowercase # L& CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Lowercase # L& CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Lowercase # L& CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Lowercase # L& CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Lowercase # L& CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Lowercase # L& CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Lowercase # L& CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Lowercase # L& CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Lowercase # L& CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Lowercase # L& CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Lowercase # L& CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Lowercase # L& CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Lowercase # L& CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Lowercase # L& CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Lowercase # L& CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Lowercase # L& CYRILLIC SMALL LETTER SHHA +04BD ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Lowercase # L& CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Lowercase # L& CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Lowercase # L& [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Lowercase # L& CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Lowercase # L& CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Lowercase # L& CYRILLIC SMALL LIGATURE A IE +04D7 ; Lowercase # L& CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Lowercase # L& CYRILLIC SMALL LETTER SCHWA +04DB ; Lowercase # L& CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Lowercase # L& CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Lowercase # L& CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Lowercase # L& CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Lowercase # L& CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Lowercase # L& CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Lowercase # L& CYRILLIC SMALL LETTER BARRED O +04EB ; Lowercase # L& CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Lowercase # L& CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Lowercase # L& CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Lowercase # L& CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Lowercase # L& CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Lowercase # L& CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Lowercase # L& CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Lowercase # L& CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Lowercase # L& CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI DE +0503 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI DJE +0505 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI LJE +050B ; Lowercase # L& CYRILLIC SMALL LETTER KOMI NJE +050D ; Lowercase # L& CYRILLIC SMALL LETTER KOMI SJE +050F ; Lowercase # L& CYRILLIC SMALL LETTER KOMI TJE +0511 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Lowercase # L& CYRILLIC SMALL LETTER LHA +0517 ; Lowercase # L& CYRILLIC SMALL LETTER RHA +0519 ; Lowercase # L& CYRILLIC SMALL LETTER YAE +051B ; Lowercase # L& CYRILLIC SMALL LETTER QA +051D ; Lowercase # L& CYRILLIC SMALL LETTER WE +051F ; Lowercase # L& CYRILLIC SMALL LETTER ALEUT KA +0521 ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Lowercase # L& CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Lowercase # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Lowercase # L& CYRILLIC SMALL LETTER DZZHE +052D ; Lowercase # L& CYRILLIC SMALL LETTER DCHE +052F ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH DESCENDER +0560..0588 ; Lowercase # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +10D0..10FA ; Lowercase # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Lowercase # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Lowercase # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13F8..13FD ; Lowercase # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Lowercase # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Lowercase # L& CYRILLIC SMALL LETTER TJE +1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Lowercase # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E01 ; Lowercase # L& LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Lowercase # L& LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Lowercase # L& LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Lowercase # L& LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Lowercase # L& LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Lowercase # L& LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Lowercase # L& LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Lowercase # L& LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Lowercase # L& LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Lowercase # L& LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Lowercase # L& LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Lowercase # L& LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Lowercase # L& LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Lowercase # L& LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Lowercase # L& LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Lowercase # L& LATIN SMALL LETTER G WITH MACRON +1E23 ; Lowercase # L& LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Lowercase # L& LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Lowercase # L& LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Lowercase # L& LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Lowercase # L& LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Lowercase # L& LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Lowercase # L& LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Lowercase # L& LATIN SMALL LETTER K WITH ACUTE +1E33 ; Lowercase # L& LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Lowercase # L& LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Lowercase # L& LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Lowercase # L& LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Lowercase # L& LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Lowercase # L& LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Lowercase # L& LATIN SMALL LETTER M WITH ACUTE +1E41 ; Lowercase # L& LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Lowercase # L& LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Lowercase # L& LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Lowercase # L& LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Lowercase # L& LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Lowercase # L& LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Lowercase # L& LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Lowercase # L& LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Lowercase # L& LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Lowercase # L& LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Lowercase # L& LATIN SMALL LETTER P WITH ACUTE +1E57 ; Lowercase # L& LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Lowercase # L& LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Lowercase # L& LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Lowercase # L& LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Lowercase # L& LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Lowercase # L& LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Lowercase # L& LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Lowercase # L& LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Lowercase # L& LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Lowercase # L& LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Lowercase # L& LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Lowercase # L& LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Lowercase # L& LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Lowercase # L& LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Lowercase # L& LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Lowercase # L& LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Lowercase # L& LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Lowercase # L& LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Lowercase # L& LATIN SMALL LETTER V WITH TILDE +1E7F ; Lowercase # L& LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Lowercase # L& LATIN SMALL LETTER W WITH GRAVE +1E83 ; Lowercase # L& LATIN SMALL LETTER W WITH ACUTE +1E85 ; Lowercase # L& LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Lowercase # L& LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Lowercase # L& LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Lowercase # L& LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Lowercase # L& LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Lowercase # L& LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Lowercase # L& LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Lowercase # L& LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9D ; Lowercase # L& [9] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH HIGH STROKE +1E9F ; Lowercase # L& LATIN SMALL LETTER DELTA +1EA1 ; Lowercase # L& LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Lowercase # L& LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Lowercase # L& LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Lowercase # L& LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Lowercase # L& LATIN SMALL LETTER E WITH TILDE +1EBF ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Lowercase # L& LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Lowercase # L& LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Lowercase # L& LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Lowercase # L& LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Lowercase # L& LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Lowercase # L& LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Lowercase # L& LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Lowercase # L& LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Lowercase # L& LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Lowercase # L& LATIN SMALL LETTER Y WITH TILDE +1EFB ; Lowercase # L& LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Lowercase # L& LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Lowercase # L& [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Lowercase # L& [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Lowercase # L& [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Lowercase # L& [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Lowercase # L& [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Lowercase # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Lowercase # L& [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1F87 ; Lowercase # L& [8] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F90..1F97 ; Lowercase # L& [8] GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA0..1FA7 ; Lowercase # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FB0..1FB4 ; Lowercase # L& [5] GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Lowercase # L& [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBE ; Lowercase # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Lowercase # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Lowercase # L& [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FD0..1FD3 ; Lowercase # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Lowercase # L& [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Lowercase # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Lowercase # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +2071 ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +210A ; Lowercase # L& SCRIPT SMALL G +210E..210F ; Lowercase # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI +2113 ; Lowercase # L& SCRIPT SMALL L +212F ; Lowercase # L& SCRIPT SMALL E +2134 ; Lowercase # L& SCRIPT SMALL O +2139 ; Lowercase # L& INFORMATION SOURCE +213C..213D ; Lowercase # L& [2] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK SMALL GAMMA +2146..2149 ; Lowercase # L& [4] DOUBLE-STRUCK ITALIC SMALL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Lowercase # L& TURNED SMALL F +2170..217F ; Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2184 ; Lowercase # L& LATIN SMALL LETTER REVERSED C +24D0..24E9 ; Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C30..2C5F ; Lowercase # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Lowercase # L& LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Lowercase # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Lowercase # L& LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Lowercase # L& LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Lowercase # L& LATIN SMALL LETTER Z WITH DESCENDER +2C71 ; Lowercase # L& LATIN SMALL LETTER V WITH RIGHT HOOK +2C73..2C74 ; Lowercase # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL +2C76..2C7B ; Lowercase # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C81 ; Lowercase # L& COPTIC SMALL LETTER ALFA +2C83 ; Lowercase # L& COPTIC SMALL LETTER VIDA +2C85 ; Lowercase # L& COPTIC SMALL LETTER GAMMA +2C87 ; Lowercase # L& COPTIC SMALL LETTER DALDA +2C89 ; Lowercase # L& COPTIC SMALL LETTER EIE +2C8B ; Lowercase # L& COPTIC SMALL LETTER SOU +2C8D ; Lowercase # L& COPTIC SMALL LETTER ZATA +2C8F ; Lowercase # L& COPTIC SMALL LETTER HATE +2C91 ; Lowercase # L& COPTIC SMALL LETTER THETHE +2C93 ; Lowercase # L& COPTIC SMALL LETTER IAUDA +2C95 ; Lowercase # L& COPTIC SMALL LETTER KAPA +2C97 ; Lowercase # L& COPTIC SMALL LETTER LAULA +2C99 ; Lowercase # L& COPTIC SMALL LETTER MI +2C9B ; Lowercase # L& COPTIC SMALL LETTER NI +2C9D ; Lowercase # L& COPTIC SMALL LETTER KSI +2C9F ; Lowercase # L& COPTIC SMALL LETTER O +2CA1 ; Lowercase # L& COPTIC SMALL LETTER PI +2CA3 ; Lowercase # L& COPTIC SMALL LETTER RO +2CA5 ; Lowercase # L& COPTIC SMALL LETTER SIMA +2CA7 ; Lowercase # L& COPTIC SMALL LETTER TAU +2CA9 ; Lowercase # L& COPTIC SMALL LETTER UA +2CAB ; Lowercase # L& COPTIC SMALL LETTER FI +2CAD ; Lowercase # L& COPTIC SMALL LETTER KHI +2CAF ; Lowercase # L& COPTIC SMALL LETTER PSI +2CB1 ; Lowercase # L& COPTIC SMALL LETTER OOU +2CB3 ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Lowercase # L& COPTIC SMALL LETTER SAMPI +2CC3 ; Lowercase # L& COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Lowercase # L& COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Lowercase # L& COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Lowercase # L& COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Lowercase # L& COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Lowercase # L& COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3..2CE4 ; Lowercase # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI +2CEC ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lowercase # L& COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Lowercase # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lowercase # L& GEORGIAN SMALL LETTER YN +2D2D ; Lowercase # L& GEORGIAN SMALL LETTER AEN +A641 ; Lowercase # L& CYRILLIC SMALL LETTER ZEMLYA +A643 ; Lowercase # L& CYRILLIC SMALL LETTER DZELO +A645 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Lowercase # L& CYRILLIC SMALL LETTER IOTA +A649 ; Lowercase # L& CYRILLIC SMALL LETTER DJERV +A64B ; Lowercase # L& CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Lowercase # L& CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Lowercase # L& CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Lowercase # L& CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED YU +A657 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Lowercase # L& CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Lowercase # L& CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Lowercase # L& CYRILLIC SMALL LETTER YN +A661 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Lowercase # L& CYRILLIC SMALL LETTER SOFT DE +A665 ; Lowercase # L& CYRILLIC SMALL LETTER SOFT EL +A667 ; Lowercase # L& CYRILLIC SMALL LETTER SOFT EM +A669 ; Lowercase # L& CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Lowercase # L& CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Lowercase # L& CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Lowercase # L& CYRILLIC SMALL LETTER DWE +A683 ; Lowercase # L& CYRILLIC SMALL LETTER DZWE +A685 ; Lowercase # L& CYRILLIC SMALL LETTER ZHWE +A687 ; Lowercase # L& CYRILLIC SMALL LETTER CCHE +A689 ; Lowercase # L& CYRILLIC SMALL LETTER DZZE +A68B ; Lowercase # L& CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Lowercase # L& CYRILLIC SMALL LETTER TWE +A68F ; Lowercase # L& CYRILLIC SMALL LETTER TSWE +A691 ; Lowercase # L& CYRILLIC SMALL LETTER TSSE +A693 ; Lowercase # L& CYRILLIC SMALL LETTER TCHE +A695 ; Lowercase # L& CYRILLIC SMALL LETTER HWE +A697 ; Lowercase # L& CYRILLIC SMALL LETTER SHWE +A699 ; Lowercase # L& CYRILLIC SMALL LETTER DOUBLE O +A69B ; Lowercase # L& CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A723 ; Lowercase # L& LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Lowercase # L& LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Lowercase # L& LATIN SMALL LETTER HENG +A729 ; Lowercase # L& LATIN SMALL LETTER TZ +A72B ; Lowercase # L& LATIN SMALL LETTER TRESILLO +A72D ; Lowercase # L& LATIN SMALL LETTER CUATRILLO +A72F..A731 ; Lowercase # L& [3] LATIN SMALL LETTER CUATRILLO WITH COMMA..LATIN LETTER SMALL CAPITAL S +A733 ; Lowercase # L& LATIN SMALL LETTER AA +A735 ; Lowercase # L& LATIN SMALL LETTER AO +A737 ; Lowercase # L& LATIN SMALL LETTER AU +A739 ; Lowercase # L& LATIN SMALL LETTER AV +A73B ; Lowercase # L& LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Lowercase # L& LATIN SMALL LETTER AY +A73F ; Lowercase # L& LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Lowercase # L& LATIN SMALL LETTER K WITH STROKE +A743 ; Lowercase # L& LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Lowercase # L& LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Lowercase # L& LATIN SMALL LETTER BROKEN L +A749 ; Lowercase # L& LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Lowercase # L& LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Lowercase # L& LATIN SMALL LETTER O WITH LOOP +A74F ; Lowercase # L& LATIN SMALL LETTER OO +A751 ; Lowercase # L& LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Lowercase # L& LATIN SMALL LETTER P WITH FLOURISH +A755 ; Lowercase # L& LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Lowercase # L& LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Lowercase # L& LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Lowercase # L& LATIN SMALL LETTER R ROTUNDA +A75D ; Lowercase # L& LATIN SMALL LETTER RUM ROTUNDA +A75F ; Lowercase # L& LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Lowercase # L& LATIN SMALL LETTER VY +A763 ; Lowercase # L& LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Lowercase # L& LATIN SMALL LETTER THORN WITH STROKE +A767 ; Lowercase # L& LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Lowercase # L& LATIN SMALL LETTER VEND +A76B ; Lowercase # L& LATIN SMALL LETTER ET +A76D ; Lowercase # L& LATIN SMALL LETTER IS +A76F ; Lowercase # L& LATIN SMALL LETTER CON +A770 ; Lowercase # Lm MODIFIER LETTER US +A771..A778 ; Lowercase # L& [8] LATIN SMALL LETTER DUM..LATIN SMALL LETTER UM +A77A ; Lowercase # L& LATIN SMALL LETTER INSULAR D +A77C ; Lowercase # L& LATIN SMALL LETTER INSULAR F +A77F ; Lowercase # L& LATIN SMALL LETTER TURNED INSULAR G +A781 ; Lowercase # L& LATIN SMALL LETTER TURNED L +A783 ; Lowercase # L& LATIN SMALL LETTER INSULAR R +A785 ; Lowercase # L& LATIN SMALL LETTER INSULAR S +A787 ; Lowercase # L& LATIN SMALL LETTER INSULAR T +A78C ; Lowercase # L& LATIN SMALL LETTER SALTILLO +A78E ; Lowercase # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A791 ; Lowercase # L& LATIN SMALL LETTER N WITH DESCENDER +A793..A795 ; Lowercase # L& [3] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER H WITH PALATAL HOOK +A797 ; Lowercase # L& LATIN SMALL LETTER B WITH FLOURISH +A799 ; Lowercase # L& LATIN SMALL LETTER F WITH STROKE +A79B ; Lowercase # L& LATIN SMALL LETTER VOLAPUK AE +A79D ; Lowercase # L& LATIN SMALL LETTER VOLAPUK OE +A79F ; Lowercase # L& LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Lowercase # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Lowercase # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Lowercase # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Lowercase # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Lowercase # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AF ; Lowercase # L& LATIN LETTER SMALL CAPITAL Q +A7B5 ; Lowercase # L& LATIN SMALL LETTER BETA +A7B7 ; Lowercase # L& LATIN SMALL LETTER OMEGA +A7B9 ; Lowercase # L& LATIN SMALL LETTER U WITH STROKE +A7BB ; Lowercase # L& LATIN SMALL LETTER GLOTTAL A +A7BD ; Lowercase # L& LATIN SMALL LETTER GLOTTAL I +A7BF ; Lowercase # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Lowercase # L& LATIN SMALL LETTER OLD POLISH O +A7C3 ; Lowercase # L& LATIN SMALL LETTER ANGLICANA W +A7C8 ; Lowercase # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Lowercase # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Lowercase # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D1 ; Lowercase # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN +A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Lowercase # L& LATIN SMALL LETTER LAMBDA +A7F2..A7F4 ; Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H +A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M +AB30..AB5A ; Lowercase # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Lowercase # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Lowercase # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; Lowercase # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Lowercase # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Lowercase # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Lowercase # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Lowercase # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Lowercase # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Lowercase # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Lowercase # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Lowercase # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10780 ; Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10CC0..10CF2 ; Lowercase # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Lowercase # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1D41A..1D433 ; Lowercase # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z +1D44E..1D454 ; Lowercase # L& [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G +1D456..1D467 ; Lowercase # L& [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z +1D482..1D49B ; Lowercase # L& [26] MATHEMATICAL BOLD ITALIC SMALL A..MATHEMATICAL BOLD ITALIC SMALL Z +1D4B6..1D4B9 ; Lowercase # L& [4] MATHEMATICAL SCRIPT SMALL A..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Lowercase # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Lowercase # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D4CF ; Lowercase # L& [11] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL SCRIPT SMALL Z +1D4EA..1D503 ; Lowercase # L& [26] MATHEMATICAL BOLD SCRIPT SMALL A..MATHEMATICAL BOLD SCRIPT SMALL Z +1D51E..1D537 ; Lowercase # L& [26] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL FRAKTUR SMALL Z +1D552..1D56B ; Lowercase # L& [26] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL DOUBLE-STRUCK SMALL Z +1D586..1D59F ; Lowercase # L& [26] MATHEMATICAL BOLD FRAKTUR SMALL A..MATHEMATICAL BOLD FRAKTUR SMALL Z +1D5BA..1D5D3 ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF SMALL A..MATHEMATICAL SANS-SERIF SMALL Z +1D5EE..1D607 ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF BOLD SMALL A..MATHEMATICAL SANS-SERIF BOLD SMALL Z +1D622..1D63B ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF ITALIC SMALL A..MATHEMATICAL SANS-SERIF ITALIC SMALL Z +1D656..1D66F ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL A..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Z +1D68A..1D6A5 ; Lowercase # L& [28] MATHEMATICAL MONOSPACE SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6C2..1D6DA ; Lowercase # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6E1 ; Lowercase # L& [6] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL BOLD PI SYMBOL +1D6FC..1D714 ; Lowercase # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D71B ; Lowercase # L& [6] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL ITALIC PI SYMBOL +1D736..1D74E ; Lowercase # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D755 ; Lowercase # L& [6] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC PI SYMBOL +1D770..1D788 ; Lowercase # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D78F ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD PI SYMBOL +1D7AA..1D7C2 ; Lowercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7C9 ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL +1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 2569 + +# ================================================ + +# Derived Property: Uppercase +# Generated from: Lu + Other_Uppercase + +0041..005A ; Uppercase # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00C0..00D6 ; Uppercase # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DE ; Uppercase # L& [7] LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN +0100 ; Uppercase # L& LATIN CAPITAL LETTER A WITH MACRON +0102 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE +0104 ; Uppercase # L& LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Uppercase # L& LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Uppercase # L& LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Uppercase # L& LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Uppercase # L& LATIN CAPITAL LETTER C WITH CARON +010E ; Uppercase # L& LATIN CAPITAL LETTER D WITH CARON +0110 ; Uppercase # L& LATIN CAPITAL LETTER D WITH STROKE +0112 ; Uppercase # L& LATIN CAPITAL LETTER E WITH MACRON +0114 ; Uppercase # L& LATIN CAPITAL LETTER E WITH BREVE +0116 ; Uppercase # L& LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Uppercase # L& LATIN CAPITAL LETTER E WITH OGONEK +011A ; Uppercase # L& LATIN CAPITAL LETTER E WITH CARON +011C ; Uppercase # L& LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Uppercase # L& LATIN CAPITAL LETTER G WITH BREVE +0120 ; Uppercase # L& LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Uppercase # L& LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Uppercase # L& LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Uppercase # L& LATIN CAPITAL LETTER H WITH STROKE +0128 ; Uppercase # L& LATIN CAPITAL LETTER I WITH TILDE +012A ; Uppercase # L& LATIN CAPITAL LETTER I WITH MACRON +012C ; Uppercase # L& LATIN CAPITAL LETTER I WITH BREVE +012E ; Uppercase # L& LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Uppercase # L& LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Uppercase # L& LATIN CAPITAL LIGATURE IJ +0134 ; Uppercase # L& LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Uppercase # L& LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Uppercase # L& LATIN CAPITAL LETTER L WITH ACUTE +013B ; Uppercase # L& LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Uppercase # L& LATIN CAPITAL LETTER L WITH CARON +013F ; Uppercase # L& LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Uppercase # L& LATIN CAPITAL LETTER L WITH STROKE +0143 ; Uppercase # L& LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Uppercase # L& LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Uppercase # L& LATIN CAPITAL LETTER N WITH CARON +014A ; Uppercase # L& LATIN CAPITAL LETTER ENG +014C ; Uppercase # L& LATIN CAPITAL LETTER O WITH MACRON +014E ; Uppercase # L& LATIN CAPITAL LETTER O WITH BREVE +0150 ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Uppercase # L& LATIN CAPITAL LIGATURE OE +0154 ; Uppercase # L& LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Uppercase # L& LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Uppercase # L& LATIN CAPITAL LETTER R WITH CARON +015A ; Uppercase # L& LATIN CAPITAL LETTER S WITH ACUTE +015C ; Uppercase # L& LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Uppercase # L& LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Uppercase # L& LATIN CAPITAL LETTER S WITH CARON +0162 ; Uppercase # L& LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Uppercase # L& LATIN CAPITAL LETTER T WITH CARON +0166 ; Uppercase # L& LATIN CAPITAL LETTER T WITH STROKE +0168 ; Uppercase # L& LATIN CAPITAL LETTER U WITH TILDE +016A ; Uppercase # L& LATIN CAPITAL LETTER U WITH MACRON +016C ; Uppercase # L& LATIN CAPITAL LETTER U WITH BREVE +016E ; Uppercase # L& LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Uppercase # L& LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Uppercase # L& LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Uppercase # L& [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Uppercase # L& LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Uppercase # L& LATIN CAPITAL LETTER Z WITH CARON +0181..0182 ; Uppercase # L& [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Uppercase # L& LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Uppercase # L& [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Uppercase # L& [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Uppercase # L& [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Uppercase # L& [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Uppercase # L& [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Uppercase # L& [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Uppercase # L& [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Uppercase # L& LATIN CAPITAL LETTER OI +01A4 ; Uppercase # L& LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Uppercase # L& [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Uppercase # L& LATIN CAPITAL LETTER ESH +01AC ; Uppercase # L& LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Uppercase # L& [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Uppercase # L& [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Uppercase # L& [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Uppercase # L& LATIN CAPITAL LETTER TONE FIVE +01C4 ; Uppercase # L& LATIN CAPITAL LETTER DZ WITH CARON +01C7 ; Uppercase # L& LATIN CAPITAL LETTER LJ +01CA ; Uppercase # L& LATIN CAPITAL LETTER NJ +01CD ; Uppercase # L& LATIN CAPITAL LETTER A WITH CARON +01CF ; Uppercase # L& LATIN CAPITAL LETTER I WITH CARON +01D1 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CARON +01D3 ; Uppercase # L& LATIN CAPITAL LETTER U WITH CARON +01D5 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Uppercase # L& LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Uppercase # L& LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Uppercase # L& LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Uppercase # L& LATIN CAPITAL LETTER G WITH CARON +01E8 ; Uppercase # L& LATIN CAPITAL LETTER K WITH CARON +01EA ; Uppercase # L& LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Uppercase # L& LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Uppercase # L& LATIN CAPITAL LETTER EZH WITH CARON +01F1 ; Uppercase # L& LATIN CAPITAL LETTER DZ +01F4 ; Uppercase # L& LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Uppercase # L& [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Uppercase # L& LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Uppercase # L& LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Uppercase # L& LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Uppercase # L& LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Uppercase # L& LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Uppercase # L& LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Uppercase # L& LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Uppercase # L& LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Uppercase # L& LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Uppercase # L& LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Uppercase # L& LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Uppercase # L& LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Uppercase # L& LATIN CAPITAL LETTER YOGH +021E ; Uppercase # L& LATIN CAPITAL LETTER H WITH CARON +0220 ; Uppercase # L& LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Uppercase # L& LATIN CAPITAL LETTER OU +0224 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Uppercase # L& LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Uppercase # L& LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Uppercase # L& [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Uppercase # L& [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Uppercase # L& [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Uppercase # L& LATIN CAPITAL LETTER J WITH STROKE +024A ; Uppercase # L& LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Uppercase # L& LATIN CAPITAL LETTER R WITH STROKE +024E ; Uppercase # L& LATIN CAPITAL LETTER Y WITH STROKE +0370 ; Uppercase # L& GREEK CAPITAL LETTER HETA +0372 ; Uppercase # L& GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Uppercase # L& GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Uppercase # L& GREEK CAPITAL LETTER YOT +0386 ; Uppercase # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Uppercase # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Uppercase # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Uppercase # L& [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Uppercase # L& [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Uppercase # L& [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03CF ; Uppercase # L& GREEK CAPITAL KAI SYMBOL +03D2..03D4 ; Uppercase # L& [3] GREEK UPSILON WITH HOOK SYMBOL..GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL +03D8 ; Uppercase # L& GREEK LETTER ARCHAIC KOPPA +03DA ; Uppercase # L& GREEK LETTER STIGMA +03DC ; Uppercase # L& GREEK LETTER DIGAMMA +03DE ; Uppercase # L& GREEK LETTER KOPPA +03E0 ; Uppercase # L& GREEK LETTER SAMPI +03E2 ; Uppercase # L& COPTIC CAPITAL LETTER SHEI +03E4 ; Uppercase # L& COPTIC CAPITAL LETTER FEI +03E6 ; Uppercase # L& COPTIC CAPITAL LETTER KHEI +03E8 ; Uppercase # L& COPTIC CAPITAL LETTER HORI +03EA ; Uppercase # L& COPTIC CAPITAL LETTER GANGIA +03EC ; Uppercase # L& COPTIC CAPITAL LETTER SHIMA +03EE ; Uppercase # L& COPTIC CAPITAL LETTER DEI +03F4 ; Uppercase # L& GREEK CAPITAL THETA SYMBOL +03F7 ; Uppercase # L& GREEK CAPITAL LETTER SHO +03F9..03FA ; Uppercase # L& [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Uppercase # L& [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Uppercase # L& CYRILLIC CAPITAL LETTER OMEGA +0462 ; Uppercase # L& CYRILLIC CAPITAL LETTER YAT +0464 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Uppercase # L& CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Uppercase # L& CYRILLIC CAPITAL LETTER BIG YUS +046C ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Uppercase # L& CYRILLIC CAPITAL LETTER KSI +0470 ; Uppercase # L& CYRILLIC CAPITAL LETTER PSI +0472 ; Uppercase # L& CYRILLIC CAPITAL LETTER FITA +0474 ; Uppercase # L& CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Uppercase # L& CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Uppercase # L& CYRILLIC CAPITAL LETTER UK +047A ; Uppercase # L& CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Uppercase # L& CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Uppercase # L& CYRILLIC CAPITAL LETTER OT +0480 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOPPA +048A ; Uppercase # L& CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Uppercase # L& CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Uppercase # L& CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Uppercase # L& CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Uppercase # L& CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Uppercase # L& CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Uppercase # L& CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Uppercase # L& CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Uppercase # L& CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Uppercase # L& CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Uppercase # L& CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Uppercase # L& CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Uppercase # L& CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Uppercase # L& CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA +04BC ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Uppercase # L& [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Uppercase # L& CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Uppercase # L& CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Uppercase # L& CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Uppercase # L& CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Uppercase # L& CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Uppercase # L& CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Uppercase # L& CYRILLIC CAPITAL LETTER SCHWA +04DA ; Uppercase # L& CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Uppercase # L& CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Uppercase # L& CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Uppercase # L& CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Uppercase # L& CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Uppercase # L& CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Uppercase # L& CYRILLIC CAPITAL LETTER BARRED O +04EA ; Uppercase # L& CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Uppercase # L& CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Uppercase # L& CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Uppercase # L& CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Uppercase # L& CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Uppercase # L& CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Uppercase # L& CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Uppercase # L& CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Uppercase # L& CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Uppercase # L& CYRILLIC CAPITAL LETTER LHA +0516 ; Uppercase # L& CYRILLIC CAPITAL LETTER RHA +0518 ; Uppercase # L& CYRILLIC CAPITAL LETTER YAE +051A ; Uppercase # L& CYRILLIC CAPITAL LETTER QA +051C ; Uppercase # L& CYRILLIC CAPITAL LETTER WE +051E ; Uppercase # L& CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Uppercase # L& CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Uppercase # L& CYRILLIC CAPITAL LETTER DZZHE +052C ; Uppercase # L& CYRILLIC CAPITAL LETTER DCHE +052E ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Uppercase # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +10A0..10C5 ; Uppercase # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN +10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN +13A0..13F5 ; Uppercase # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Uppercase # L& CYRILLIC CAPITAL LETTER TJE +1C90..1CBA ; Uppercase # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Uppercase # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Uppercase # L& LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Uppercase # L& LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Uppercase # L& LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Uppercase # L& LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Uppercase # L& LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Uppercase # L& LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Uppercase # L& LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Uppercase # L& LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Uppercase # L& LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Uppercase # L& LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Uppercase # L& LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Uppercase # L& LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Uppercase # L& LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Uppercase # L& LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Uppercase # L& LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Uppercase # L& LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Uppercase # L& LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Uppercase # L& LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Uppercase # L& LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Uppercase # L& LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Uppercase # L& LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Uppercase # L& LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Uppercase # L& LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Uppercase # L& LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Uppercase # L& LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Uppercase # L& LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Uppercase # L& LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Uppercase # L& LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Uppercase # L& LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Uppercase # L& LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Uppercase # L& LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Uppercase # L& LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Uppercase # L& LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Uppercase # L& LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Uppercase # L& LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Uppercase # L& LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Uppercase # L& LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Uppercase # L& LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Uppercase # L& LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Uppercase # L& LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Uppercase # L& LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Uppercase # L& LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Uppercase # L& LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Uppercase # L& LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Uppercase # L& LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Uppercase # L& LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Uppercase # L& LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Uppercase # L& LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Uppercase # L& LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Uppercase # L& LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Uppercase # L& LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Uppercase # L& LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Uppercase # L& LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Uppercase # L& LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Uppercase # L& LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Uppercase # L& LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Uppercase # L& LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Uppercase # L& LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Uppercase # L& LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9E ; Uppercase # L& LATIN CAPITAL LETTER SHARP S +1EA0 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Uppercase # L& LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Uppercase # L& LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Uppercase # L& LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Uppercase # L& LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Uppercase # L& LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Uppercase # L& LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Uppercase # L& LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Uppercase # L& LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Uppercase # L& LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Uppercase # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Uppercase # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Uppercase # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Uppercase # L& [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Uppercase # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Uppercase # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1FB8..1FBB ; Uppercase # L& [4] GREEK CAPITAL LETTER ALPHA WITH VRACHY..GREEK CAPITAL LETTER ALPHA WITH OXIA +1FC8..1FCB ; Uppercase # L& [4] GREEK CAPITAL LETTER EPSILON WITH VARIA..GREEK CAPITAL LETTER ETA WITH OXIA +1FD8..1FDB ; Uppercase # L& [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Uppercase # L& [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF8..1FFB ; Uppercase # L& [4] GREEK CAPITAL LETTER OMICRON WITH VARIA..GREEK CAPITAL LETTER OMEGA WITH OXIA +2102 ; Uppercase # L& DOUBLE-STRUCK CAPITAL C +2107 ; Uppercase # L& EULER CONSTANT +210B..210D ; Uppercase # L& [3] SCRIPT CAPITAL H..DOUBLE-STRUCK CAPITAL H +2110..2112 ; Uppercase # L& [3] SCRIPT CAPITAL I..SCRIPT CAPITAL L +2115 ; Uppercase # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Uppercase # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Uppercase # L& DOUBLE-STRUCK CAPITAL Z +2126 ; Uppercase # L& OHM SIGN +2128 ; Uppercase # L& BLACK-LETTER CAPITAL Z +212A..212D ; Uppercase # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +2130..2133 ; Uppercase # L& [4] SCRIPT CAPITAL E..SCRIPT CAPITAL M +213E..213F ; Uppercase # L& [2] DOUBLE-STRUCK CAPITAL GAMMA..DOUBLE-STRUCK CAPITAL PI +2145 ; Uppercase # L& DOUBLE-STRUCK ITALIC CAPITAL D +2160..216F ; Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +2183 ; Uppercase # L& ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24CF ; Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +2C00..2C2F ; Uppercase # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Uppercase # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Uppercase # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Uppercase # L& LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Uppercase # L& LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Uppercase # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Uppercase # L& LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Uppercase # L& LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Uppercase # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Uppercase # L& COPTIC CAPITAL LETTER VIDA +2C84 ; Uppercase # L& COPTIC CAPITAL LETTER GAMMA +2C86 ; Uppercase # L& COPTIC CAPITAL LETTER DALDA +2C88 ; Uppercase # L& COPTIC CAPITAL LETTER EIE +2C8A ; Uppercase # L& COPTIC CAPITAL LETTER SOU +2C8C ; Uppercase # L& COPTIC CAPITAL LETTER ZATA +2C8E ; Uppercase # L& COPTIC CAPITAL LETTER HATE +2C90 ; Uppercase # L& COPTIC CAPITAL LETTER THETHE +2C92 ; Uppercase # L& COPTIC CAPITAL LETTER IAUDA +2C94 ; Uppercase # L& COPTIC CAPITAL LETTER KAPA +2C96 ; Uppercase # L& COPTIC CAPITAL LETTER LAULA +2C98 ; Uppercase # L& COPTIC CAPITAL LETTER MI +2C9A ; Uppercase # L& COPTIC CAPITAL LETTER NI +2C9C ; Uppercase # L& COPTIC CAPITAL LETTER KSI +2C9E ; Uppercase # L& COPTIC CAPITAL LETTER O +2CA0 ; Uppercase # L& COPTIC CAPITAL LETTER PI +2CA2 ; Uppercase # L& COPTIC CAPITAL LETTER RO +2CA4 ; Uppercase # L& COPTIC CAPITAL LETTER SIMA +2CA6 ; Uppercase # L& COPTIC CAPITAL LETTER TAU +2CA8 ; Uppercase # L& COPTIC CAPITAL LETTER UA +2CAA ; Uppercase # L& COPTIC CAPITAL LETTER FI +2CAC ; Uppercase # L& COPTIC CAPITAL LETTER KHI +2CAE ; Uppercase # L& COPTIC CAPITAL LETTER PSI +2CB0 ; Uppercase # L& COPTIC CAPITAL LETTER OOU +2CB2 ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Uppercase # L& COPTIC CAPITAL LETTER SAMPI +2CC2 ; Uppercase # L& COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Uppercase # L& COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Uppercase # L& COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Uppercase # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZELO +A644 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTA +A648 ; Uppercase # L& CYRILLIC CAPITAL LETTER DJERV +A64A ; Uppercase # L& CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Uppercase # L& CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Uppercase # L& CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Uppercase # L& CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Uppercase # L& CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Uppercase # L& CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Uppercase # L& CYRILLIC CAPITAL LETTER YN +A660 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Uppercase # L& CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Uppercase # L& CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Uppercase # L& CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Uppercase # L& CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Uppercase # L& CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Uppercase # L& CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Uppercase # L& CYRILLIC CAPITAL LETTER DWE +A682 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZWE +A684 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZHWE +A686 ; Uppercase # L& CYRILLIC CAPITAL LETTER CCHE +A688 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZZE +A68A ; Uppercase # L& CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Uppercase # L& CYRILLIC CAPITAL LETTER TWE +A68E ; Uppercase # L& CYRILLIC CAPITAL LETTER TSWE +A690 ; Uppercase # L& CYRILLIC CAPITAL LETTER TSSE +A692 ; Uppercase # L& CYRILLIC CAPITAL LETTER TCHE +A694 ; Uppercase # L& CYRILLIC CAPITAL LETTER HWE +A696 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHWE +A698 ; Uppercase # L& CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Uppercase # L& CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Uppercase # L& LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Uppercase # L& LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Uppercase # L& LATIN CAPITAL LETTER HENG +A728 ; Uppercase # L& LATIN CAPITAL LETTER TZ +A72A ; Uppercase # L& LATIN CAPITAL LETTER TRESILLO +A72C ; Uppercase # L& LATIN CAPITAL LETTER CUATRILLO +A72E ; Uppercase # L& LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Uppercase # L& LATIN CAPITAL LETTER AA +A734 ; Uppercase # L& LATIN CAPITAL LETTER AO +A736 ; Uppercase # L& LATIN CAPITAL LETTER AU +A738 ; Uppercase # L& LATIN CAPITAL LETTER AV +A73A ; Uppercase # L& LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Uppercase # L& LATIN CAPITAL LETTER AY +A73E ; Uppercase # L& LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Uppercase # L& LATIN CAPITAL LETTER K WITH STROKE +A742 ; Uppercase # L& LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Uppercase # L& LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Uppercase # L& LATIN CAPITAL LETTER BROKEN L +A748 ; Uppercase # L& LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Uppercase # L& LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Uppercase # L& LATIN CAPITAL LETTER O WITH LOOP +A74E ; Uppercase # L& LATIN CAPITAL LETTER OO +A750 ; Uppercase # L& LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Uppercase # L& LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Uppercase # L& LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Uppercase # L& LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Uppercase # L& LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Uppercase # L& LATIN CAPITAL LETTER R ROTUNDA +A75C ; Uppercase # L& LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Uppercase # L& LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Uppercase # L& LATIN CAPITAL LETTER VY +A762 ; Uppercase # L& LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Uppercase # L& LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Uppercase # L& LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Uppercase # L& LATIN CAPITAL LETTER VEND +A76A ; Uppercase # L& LATIN CAPITAL LETTER ET +A76C ; Uppercase # L& LATIN CAPITAL LETTER IS +A76E ; Uppercase # L& LATIN CAPITAL LETTER CON +A779 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR D +A77B ; Uppercase # L& LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Uppercase # L& [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Uppercase # L& LATIN CAPITAL LETTER TURNED L +A782 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR R +A784 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR S +A786 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR T +A78B ; Uppercase # L& LATIN CAPITAL LETTER SALTILLO +A78D ; Uppercase # L& LATIN CAPITAL LETTER TURNED H +A790 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Uppercase # L& LATIN CAPITAL LETTER C WITH BAR +A796 ; Uppercase # L& LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Uppercase # L& LATIN CAPITAL LETTER F WITH STROKE +A79A ; Uppercase # L& LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Uppercase # L& LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Uppercase # L& LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Uppercase # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Uppercase # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Uppercase # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Uppercase # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Uppercase # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Uppercase # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Uppercase # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Uppercase # L& LATIN CAPITAL LETTER OMEGA +A7B8 ; Uppercase # L& LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Uppercase # L& LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Uppercase # L& LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Uppercase # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Uppercase # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Uppercase # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F5 ; Uppercase # L& LATIN CAPITAL LETTER REVERSED HALF H +FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Uppercase # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Uppercase # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Uppercase # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Uppercase # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Uppercase # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Uppercase # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Uppercase # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +118A0..118BF ; Uppercase # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Uppercase # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z +1D434..1D44D ; Uppercase # L& [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z +1D468..1D481 ; Uppercase # L& [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z +1D49C ; Uppercase # L& MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Uppercase # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Uppercase # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Uppercase # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Uppercase # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B5 ; Uppercase # L& [8] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT CAPITAL Z +1D4D0..1D4E9 ; Uppercase # L& [26] MATHEMATICAL BOLD SCRIPT CAPITAL A..MATHEMATICAL BOLD SCRIPT CAPITAL Z +1D504..1D505 ; Uppercase # L& [2] MATHEMATICAL FRAKTUR CAPITAL A..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Uppercase # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Uppercase # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Uppercase # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D538..1D539 ; Uppercase # L& [2] MATHEMATICAL DOUBLE-STRUCK CAPITAL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Uppercase # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Uppercase # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Uppercase # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Uppercase # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D56C..1D585 ; Uppercase # L& [26] MATHEMATICAL BOLD FRAKTUR CAPITAL A..MATHEMATICAL BOLD FRAKTUR CAPITAL Z +1D5A0..1D5B9 ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF CAPITAL A..MATHEMATICAL SANS-SERIF CAPITAL Z +1D5D4..1D5ED ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF BOLD CAPITAL A..MATHEMATICAL SANS-SERIF BOLD CAPITAL Z +1D608..1D621 ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF ITALIC CAPITAL Z +1D63C..1D655 ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Z +1D670..1D689 ; Uppercase # L& [26] MATHEMATICAL MONOSPACE CAPITAL A..MATHEMATICAL MONOSPACE CAPITAL Z +1D6A8..1D6C0 ; Uppercase # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6E2..1D6FA ; Uppercase # L& [25] MATHEMATICAL ITALIC CAPITAL ALPHA..MATHEMATICAL ITALIC CAPITAL OMEGA +1D71C..1D734 ; Uppercase # L& [25] MATHEMATICAL BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D756..1D76E ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D790..1D7A8 ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA +1E900..1E921 ; Uppercase # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA +1F130..1F149 ; Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1978 + +# ================================================ + +# Derived Property: Cased (Cased) +# As defined by Unicode Standard Definition D135 +# C has the Lowercase or Uppercase property or has a General_Category value of Titlecase_Letter. + +0041..005A ; Cased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Cased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Cased # Lo FEMININE ORDINAL INDICATOR +00B5 ; Cased # L& MICRO SIGN +00BA ; Cased # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Cased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Cased # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BC..01BF ; Cased # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C4..0293 ; Cased # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0295..02AF ; Cased # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Cased # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Cased # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Cased # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Cased # Mn COMBINING GREEK YPOGEGRAMMENI +0370..0373 ; Cased # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; Cased # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Cased # Lm GREEK YPOGEGRAMMENI +037B..037D ; Cased # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Cased # L& GREEK CAPITAL LETTER YOT +0386 ; Cased # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Cased # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Cased # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Cased # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; Cased # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; Cased # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; Cased # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Cased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0560..0588 ; Cased # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +10A0..10C5 ; Cased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Cased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Cased # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Cased # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Cased # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Cased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13A0..13F5 ; Cased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Cased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C8A ; Cased # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; Cased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Cased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Cased # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Cased # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Cased # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Cased # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Cased # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; Cased # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Cased # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Cased # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Cased # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Cased # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Cased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Cased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Cased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Cased # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Cased # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Cased # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Cased # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Cased # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Cased # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Cased # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Cased # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; Cased # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Cased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Cased # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Cased # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; Cased # L& DOUBLE-STRUCK CAPITAL C +2107 ; Cased # L& EULER CONSTANT +210A..2113 ; Cased # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Cased # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Cased # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Cased # L& DOUBLE-STRUCK CAPITAL Z +2126 ; Cased # L& OHM SIGN +2128 ; Cased # L& BLACK-LETTER CAPITAL Z +212A..212D ; Cased # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; Cased # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2139 ; Cased # L& INFORMATION SOURCE +213C..213F ; Cased # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Cased # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Cased # L& TURNED SMALL F +2160..217F ; Cased # Nl [32] ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2183..2184 ; Cased # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +24B6..24E9 ; Cased # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C7B ; Cased # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Cased # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; Cased # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; Cased # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Cased # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Cased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Cased # L& GEORGIAN SMALL LETTER YN +2D2D ; Cased # L& GEORGIAN SMALL LETTER AEN +A640..A66D ; Cased # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A680..A69B ; Cased # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Cased # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Cased # Lm MODIFIER LETTER US +A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A790..A7CD ; Cased # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; Cased # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; Cased # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M +AB30..AB5A ; Cased # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Cased # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Cased # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Cased # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; Cased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; Cased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10400..1044F ; Cased # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +104B0..104D3 ; Cased # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Cased # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10570..1057A ; Cased # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Cased # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Cased # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Cased # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Cased # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Cased # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Cased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Cased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10780 ; Cased # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Cased # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Cased # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Cased # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10C80..10CB2 ; Cased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Cased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D50..10D65 ; Cased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Cased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +118A0..118DF ; Cased # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E40..16E7F ; Cased # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1D400..1D454 ; Cased # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Cased # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Cased # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Cased # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Cased # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Cased # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Cased # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Cased # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Cased # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Cased # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Cased # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Cased # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Cased # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Cased # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Cased # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Cased # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Cased # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Cased # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Cased # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Cased # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Cased # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Cased # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Cased # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Cased # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Cased # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Cased # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Cased # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 4578 + +# ================================================ + +# Derived Property: Case_Ignorable (CI) +# As defined by Unicode Standard Definition D136 +# C is defined to be case-ignorable if +# Word_Break(C) = MidLetter or MidNumLet or Single_Quote, or +# General_Category(C) = Nonspacing_Mark (Mn), Enclosing_Mark (Me), Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). + +0027 ; Case_Ignorable # Po APOSTROPHE +002E ; Case_Ignorable # Po FULL STOP +003A ; Case_Ignorable # Po COLON +005E ; Case_Ignorable # Sk CIRCUMFLEX ACCENT +0060 ; Case_Ignorable # Sk GRAVE ACCENT +00A8 ; Case_Ignorable # Sk DIAERESIS +00AD ; Case_Ignorable # Cf SOFT HYPHEN +00AF ; Case_Ignorable # Sk MACRON +00B4 ; Case_Ignorable # Sk ACUTE ACCENT +00B7 ; Case_Ignorable # Po MIDDLE DOT +00B8 ; Case_Ignorable # Sk CEDILLA +02B0..02C1 ; Case_Ignorable # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Case_Ignorable # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Case_Ignorable # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Case_Ignorable # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Case_Ignorable # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Case_Ignorable # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Case_Ignorable # Lm MODIFIER LETTER VOICING +02ED ; Case_Ignorable # Sk MODIFIER LETTER UNASPIRATED +02EE ; Case_Ignorable # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Case_Ignorable # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..036F ; Case_Ignorable # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0374 ; Case_Ignorable # Lm GREEK NUMERAL SIGN +0375 ; Case_Ignorable # Sk GREEK LOWER NUMERAL SIGN +037A ; Case_Ignorable # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Case_Ignorable # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0387 ; Case_Ignorable # Po GREEK ANO TELEIA +0483..0487 ; Case_Ignorable # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Case_Ignorable # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0559 ; Case_Ignorable # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055F ; Case_Ignorable # Po ARMENIAN ABBREVIATION MARK +0591..05BD ; Case_Ignorable # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Case_Ignorable # Mn HEBREW POINT RAFE +05C1..05C2 ; Case_Ignorable # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Case_Ignorable # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Case_Ignorable # Mn HEBREW POINT QAMATS QATAN +05F4 ; Case_Ignorable # Po HEBREW PUNCTUATION GERSHAYIM +0600..0605 ; Case_Ignorable # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0610..061A ; Case_Ignorable # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061C ; Case_Ignorable # Cf ARABIC LETTER MARK +0640 ; Case_Ignorable # Lm ARABIC TATWEEL +064B..065F ; Case_Ignorable # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Case_Ignorable # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Case_Ignorable # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DD ; Case_Ignorable # Cf ARABIC END OF AYAH +06DF..06E4 ; Case_Ignorable # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Case_Ignorable # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Case_Ignorable # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Case_Ignorable # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +070F ; Case_Ignorable # Cf SYRIAC ABBREVIATION MARK +0711 ; Case_Ignorable # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Case_Ignorable # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Case_Ignorable # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Case_Ignorable # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Case_Ignorable # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; Case_Ignorable # Lm NKO LAJANYALAN +07FD ; Case_Ignorable # Mn NKO DANTAYALAN +0816..0819 ; Case_Ignorable # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Case_Ignorable # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Case_Ignorable # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; Case_Ignorable # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0888 ; Case_Ignorable # Sk ARABIC RAISED ROUND DOT +0890..0891 ; Case_Ignorable # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +0897..089F ; Case_Ignorable # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08C9 ; Case_Ignorable # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; Case_Ignorable # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E2 ; Case_Ignorable # Cf ARABIC DISPUTED END OF AYAH +08E3..0902 ; Case_Ignorable # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN OE +093C ; Case_Ignorable # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; Case_Ignorable # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Case_Ignorable # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; Case_Ignorable # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Case_Ignorable # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0971 ; Case_Ignorable # Lm DEVANAGARI SIGN HIGH SPACING DOT +0981 ; Case_Ignorable # Mn BENGALI SIGN CANDRABINDU +09BC ; Case_Ignorable # Mn BENGALI SIGN NUKTA +09C1..09C4 ; Case_Ignorable # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Case_Ignorable # Mn BENGALI SIGN VIRAMA +09E2..09E3 ; Case_Ignorable # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Case_Ignorable # Mn BENGALI SANDHI MARK +0A01..0A02 ; Case_Ignorable # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Case_Ignorable # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; Case_Ignorable # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Case_Ignorable # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Case_Ignorable # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Case_Ignorable # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Case_Ignorable # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Case_Ignorable # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Case_Ignorable # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Case_Ignorable # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; Case_Ignorable # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Case_Ignorable # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Case_Ignorable # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Case_Ignorable # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Case_Ignorable # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Case_Ignorable # Mn ORIYA SIGN CANDRABINDU +0B3C ; Case_Ignorable # Mn ORIYA SIGN NUKTA +0B3F ; Case_Ignorable # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; Case_Ignorable # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Case_Ignorable # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Case_Ignorable # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; Case_Ignorable # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Case_Ignorable # Mn TAMIL SIGN ANUSVARA +0BC0 ; Case_Ignorable # Mn TAMIL VOWEL SIGN II +0BCD ; Case_Ignorable # Mn TAMIL SIGN VIRAMA +0C00 ; Case_Ignorable # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Case_Ignorable # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Case_Ignorable # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Case_Ignorable # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Case_Ignorable # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Case_Ignorable # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Case_Ignorable # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Case_Ignorable # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Case_Ignorable # Mn KANNADA SIGN CANDRABINDU +0CBC ; Case_Ignorable # Mn KANNADA SIGN NUKTA +0CBF ; Case_Ignorable # Mn KANNADA VOWEL SIGN I +0CC6 ; Case_Ignorable # Mn KANNADA VOWEL SIGN E +0CCC..0CCD ; Case_Ignorable # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; Case_Ignorable # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Case_Ignorable # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Case_Ignorable # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; Case_Ignorable # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Case_Ignorable # Mn MALAYALAM SIGN VIRAMA +0D62..0D63 ; Case_Ignorable # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Case_Ignorable # Mn SINHALA SIGN CANDRABINDU +0DCA ; Case_Ignorable # Mn SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; Case_Ignorable # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Case_Ignorable # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; Case_Ignorable # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Case_Ignorable # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E46 ; Case_Ignorable # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; Case_Ignorable # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Case_Ignorable # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Case_Ignorable # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC6 ; Case_Ignorable # Lm LAO KO LA +0EC8..0ECE ; Case_Ignorable # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; Case_Ignorable # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Case_Ignorable # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Case_Ignorable # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Case_Ignorable # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; Case_Ignorable # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Case_Ignorable # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Case_Ignorable # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Case_Ignorable # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Case_Ignorable # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Case_Ignorable # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Case_Ignorable # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Case_Ignorable # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Case_Ignorable # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Case_Ignorable # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Case_Ignorable # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Case_Ignorable # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Case_Ignorable # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Case_Ignorable # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Case_Ignorable # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Case_Ignorable # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Case_Ignorable # Mn MYANMAR VOWEL SIGN AITON AI +10FC ; Case_Ignorable # Lm MODIFIER LETTER GEORGIAN NAR +135D..135F ; Case_Ignorable # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Case_Ignorable # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; Case_Ignorable # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Case_Ignorable # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Case_Ignorable # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Case_Ignorable # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Case_Ignorable # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Case_Ignorable # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; Case_Ignorable # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D7 ; Case_Ignorable # Lm KHMER SIGN LEK TOO +17DD ; Case_Ignorable # Mn KHMER SIGN ATTHACAN +180B..180D ; Case_Ignorable # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Case_Ignorable # Cf MONGOLIAN VOWEL SEPARATOR +180F ; Case_Ignorable # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1843 ; Case_Ignorable # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1885..1886 ; Case_Ignorable # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Case_Ignorable # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Case_Ignorable # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Case_Ignorable # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Case_Ignorable # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; Case_Ignorable # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Case_Ignorable # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Case_Ignorable # Mn BUGINESE VOWEL SIGN AE +1A56 ; Case_Ignorable # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Case_Ignorable # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Case_Ignorable # Mn TAI THAM SIGN SAKOT +1A62 ; Case_Ignorable # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Case_Ignorable # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Case_Ignorable # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Case_Ignorable # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK +1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN +1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; Case_Ignorable # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; Case_Ignorable # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; Case_Ignorable # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Case_Ignorable # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Case_Ignorable # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Case_Ignorable # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; Case_Ignorable # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Case_Ignorable # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Case_Ignorable # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Case_Ignorable # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Case_Ignorable # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; Case_Ignorable # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Case_Ignorable # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Case_Ignorable # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Case_Ignorable # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Case_Ignorable # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Case_Ignorable # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Case_Ignorable # Mn VEDIC SIGN TIRYAK +1CF4 ; Case_Ignorable # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Case_Ignorable # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Case_Ignorable # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Case_Ignorable # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Case_Ignorable # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; Case_Ignorable # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Case_Ignorable # Sk GREEK KORONIS +1FBF..1FC1 ; Case_Ignorable # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Case_Ignorable # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Case_Ignorable # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Case_Ignorable # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Case_Ignorable # Sk [2] GREEK OXIA..GREEK DASIA +200B..200F ; Case_Ignorable # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +2018 ; Case_Ignorable # Pi LEFT SINGLE QUOTATION MARK +2019 ; Case_Ignorable # Pf RIGHT SINGLE QUOTATION MARK +2024 ; Case_Ignorable # Po ONE DOT LEADER +2027 ; Case_Ignorable # Po HYPHENATION POINT +202A..202E ; Case_Ignorable # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Case_Ignorable # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; Case_Ignorable # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +2071 ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Case_Ignorable # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20D0..20DC ; Case_Ignorable # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Case_Ignorable # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Case_Ignorable # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Case_Ignorable # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2C7C..2C7D ; Case_Ignorable # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2CEF..2CF1 ; Case_Ignorable # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D6F ; Case_Ignorable # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D7F ; Case_Ignorable # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Case_Ignorable # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E2F ; Case_Ignorable # Lm VERTICAL TILDE +3005 ; Case_Ignorable # Lm IDEOGRAPHIC ITERATION MARK +302A..302D ; Case_Ignorable # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3031..3035 ; Case_Ignorable # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +303B ; Case_Ignorable # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +3099..309A ; Case_Ignorable # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Case_Ignorable # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; Case_Ignorable # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Case_Ignorable # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Case_Ignorable # Lm YI SYLLABLE WU +A4F8..A4FD ; Case_Ignorable # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A60C ; Case_Ignorable # Lm VAI SYLLABLE LENGTHENER +A66F ; Case_Ignorable # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Case_Ignorable # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Case_Ignorable # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67F ; Case_Ignorable # Lm CYRILLIC PAYEROK +A69C..A69D ; Case_Ignorable # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; Case_Ignorable # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Case_Ignorable # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A700..A716 ; Case_Ignorable # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Case_Ignorable # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A770 ; Case_Ignorable # Lm MODIFIER LETTER US +A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F2..A7F4 ; Case_Ignorable # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Case_Ignorable # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Case_Ignorable # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Case_Ignorable # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Case_Ignorable # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Case_Ignorable # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; Case_Ignorable # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Case_Ignorable # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Case_Ignorable # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Case_Ignorable # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9CF ; Case_Ignorable # Lm JAVANESE PANGRANGKEP +A9E5 ; Case_Ignorable # Mn MYANMAR SIGN SHAN SAW +A9E6 ; Case_Ignorable # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA29..AA2E ; Case_Ignorable # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Case_Ignorable # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Case_Ignorable # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Case_Ignorable # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Case_Ignorable # Mn CHAM CONSONANT SIGN FINAL M +AA70 ; Case_Ignorable # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA7C ; Case_Ignorable # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Case_Ignorable # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Case_Ignorable # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Case_Ignorable # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Case_Ignorable # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Case_Ignorable # Mn TAI VIET TONE MAI THO +AADD ; Case_Ignorable # Lm TAI VIET SYMBOL SAM +AAEC..AAED ; Case_Ignorable # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF3..AAF4 ; Case_Ignorable # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF6 ; Case_Ignorable # Mn MEETEI MAYEK VIRAMA +AB5B ; Case_Ignorable # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Case_Ignorable # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Case_Ignorable # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; Case_Ignorable # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +ABE5 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Case_Ignorable # Mn MEETEI MAYEK APUN IYEK +FB1E ; Case_Ignorable # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FBB2..FBC2 ; Case_Ignorable # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FE00..FE0F ; Case_Ignorable # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE13 ; Case_Ignorable # Po PRESENTATION FORM FOR VERTICAL COLON +FE20..FE2F ; Case_Ignorable # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE52 ; Case_Ignorable # Po SMALL FULL STOP +FE55 ; Case_Ignorable # Po SMALL COLON +FEFF ; Case_Ignorable # Cf ZERO WIDTH NO-BREAK SPACE +FF07 ; Case_Ignorable # Po FULLWIDTH APOSTROPHE +FF0E ; Case_Ignorable # Po FULLWIDTH FULL STOP +FF1A ; Case_Ignorable # Po FULLWIDTH COLON +FF3E ; Case_Ignorable # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Case_Ignorable # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Case_Ignorable # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Case_Ignorable # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Case_Ignorable # Sk FULLWIDTH MACRON +FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +101FD ; Case_Ignorable # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Case_Ignorable # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Case_Ignorable # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10780..10785 ; Case_Ignorable # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Case_Ignorable # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Case_Ignorable # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10A01..10A03 ; Case_Ignorable # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Case_Ignorable # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Case_Ignorable # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Case_Ignorable # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Case_Ignorable # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Case_Ignorable # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Case_Ignorable # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Case_Ignorable # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK +10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Case_Ignorable # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Case_Ignorable # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Case_Ignorable # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Case_Ignorable # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Case_Ignorable # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Case_Ignorable # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BD ; Case_Ignorable # Cf KAITHI NUMBER SIGN +110C2 ; Case_Ignorable # Mn KAITHI VOWEL SIGN VOCALIC R +110CD ; Case_Ignorable # Cf KAITHI NUMBER SIGN ABOVE +11100..11102 ; Case_Ignorable # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Case_Ignorable # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Case_Ignorable # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Case_Ignorable # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Case_Ignorable # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Case_Ignorable # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; Case_Ignorable # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Case_Ignorable # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Case_Ignorable # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Case_Ignorable # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; Case_Ignorable # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Case_Ignorable # Mn KHOJKI SIGN SUKUN +11241 ; Case_Ignorable # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; Case_Ignorable # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Case_Ignorable # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Case_Ignorable # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Case_Ignorable # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; Case_Ignorable # Mn GRANTHA VOWEL SIGN II +11366..1136C ; Case_Ignorable # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Case_Ignorable # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Case_Ignorable # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Case_Ignorable # Mn TULU-TIGALARI SIGN VIRAMA +113D0 ; Case_Ignorable # Mn TULU-TIGALARI CONJOINER +113D2 ; Case_Ignorable # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Case_Ignorable # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11438..1143F ; Case_Ignorable # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Case_Ignorable # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Case_Ignorable # Mn NEWA SIGN NUKTA +1145E ; Case_Ignorable # Mn NEWA SANDHI MARK +114B3..114B8 ; Case_Ignorable # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Case_Ignorable # Mn TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; Case_Ignorable # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Case_Ignorable # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; Case_Ignorable # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Case_Ignorable # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Case_Ignorable # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Case_Ignorable # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Case_Ignorable # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Case_Ignorable # Mn MODI SIGN ANUSVARA +1163F..11640 ; Case_Ignorable # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Case_Ignorable # Mn TAKRI SIGN ANUSVARA +116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA +1171D ; Case_Ignorable # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Case_Ignorable # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Case_Ignorable # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Case_Ignorable # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Case_Ignorable # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Case_Ignorable # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; Case_Ignorable # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; Case_Ignorable # Mn DIVES AKURU VIRAMA +11943 ; Case_Ignorable # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; Case_Ignorable # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Case_Ignorable # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Case_Ignorable # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Case_Ignorable # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Case_Ignorable # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Case_Ignorable # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Case_Ignorable # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Case_Ignorable # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Case_Ignorable # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Case_Ignorable # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Case_Ignorable # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Case_Ignorable # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Case_Ignorable # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Case_Ignorable # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Case_Ignorable # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Case_Ignorable # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Case_Ignorable # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Case_Ignorable # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Case_Ignorable # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Case_Ignorable # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Case_Ignorable # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Case_Ignorable # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Case_Ignorable # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; Case_Ignorable # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Case_Ignorable # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; Case_Ignorable # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Case_Ignorable # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; Case_Ignorable # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Case_Ignorable # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Case_Ignorable # Mn KAWI VOWEL SIGN EU +11F42 ; Case_Ignorable # Mn KAWI CONJOINER +11F5A ; Case_Ignorable # Mn KAWI SIGN NUKTA +13430..1343F ; Case_Ignorable # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13440 ; Case_Ignorable # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Case_Ignorable # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Case_Ignorable # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Case_Ignorable # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; Case_Ignorable # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Case_Ignorable # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16D40..16D42 ; Case_Ignorable # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D6B..16D6C ; Case_Ignorable # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16F4F ; Case_Ignorable # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Case_Ignorable # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Case_Ignorable # Lm OLD CHINESE ITERATION MARK +16FE4 ; Case_Ignorable # Mn KHITAN SMALL SCRIPT FILLER +1AFF0..1AFF3 ; Case_Ignorable # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Case_Ignorable # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Case_Ignorable # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1BC9D..1BC9E ; Case_Ignorable # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BCA0..1BCA3 ; Case_Ignorable # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF00..1CF2D ; Case_Ignorable # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Case_Ignorable # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; Case_Ignorable # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D173..1D17A ; Case_Ignorable # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D17B..1D182 ; Case_Ignorable # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Case_Ignorable # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Case_Ignorable # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Case_Ignorable # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Case_Ignorable # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Case_Ignorable # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Case_Ignorable # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Case_Ignorable # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Case_Ignorable # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; Case_Ignorable # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; Case_Ignorable # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; Case_Ignorable # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; Case_Ignorable # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E2AE ; Case_Ignorable # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Case_Ignorable # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EB ; Case_Ignorable # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; Case_Ignorable # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Case_Ignorable # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; Case_Ignorable # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Case_Ignorable # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; Case_Ignorable # Lm ADLAM NASALIZATION MARK +1F3FB..1F3FF ; Case_Ignorable # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +E0001 ; Case_Ignorable # Cf LANGUAGE TAG +E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2749 + +# ================================================ + +# Derived Property: Changes_When_Lowercased (CWL) +# Characters whose normalized forms are not stable under a toLowercase mapping. +# For more information, see D139 in Section 3.13, "Default Case Algorithms". +# Changes_When_Lowercased(X) is true when toLowercase(toNFD(X)) != toNFD(X) + +0041..005A ; Changes_When_Lowercased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00C0..00D6 ; Changes_When_Lowercased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DE ; Changes_When_Lowercased # L& [7] LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN +0100 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH MACRON +0102 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE +0104 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH CARON +010E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH CARON +0110 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH STROKE +0112 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH MACRON +0114 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH BREVE +0116 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH OGONEK +011A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CARON +011C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH BREVE +0120 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH STROKE +0128 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH TILDE +012A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH MACRON +012C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH BREVE +012E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Changes_When_Lowercased # L& LATIN CAPITAL LIGATURE IJ +0134 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH ACUTE +013B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH CARON +013F ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH STROKE +0143 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH CARON +014A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ENG +014C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH MACRON +014E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH BREVE +0150 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Changes_When_Lowercased # L& LATIN CAPITAL LIGATURE OE +0154 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH CARON +015A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH ACUTE +015C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CARON +0162 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH CARON +0166 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH STROKE +0168 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH TILDE +016A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH MACRON +016C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH BREVE +016E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH CARON +0181..0182 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OI +01A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Changes_When_Lowercased # L& [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ESH +01AC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TONE FIVE +01C4..01C5 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7..01C8 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER LJ..LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA..01CB ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER NJ..LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CARON +01CF ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH CARON +01D1 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CARON +01D3 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH CARON +01D5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH CARON +01E8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH CARON +01EA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER EZH WITH CARON +01F1..01F2 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER YOGH +021E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH CARON +0220 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OU +0224 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER J WITH STROKE +024A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH STROKE +024E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH STROKE +0370 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER HETA +0372 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER YOT +0386 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Changes_When_Lowercased # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Changes_When_Lowercased # L& [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Changes_When_Lowercased # L& [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Changes_When_Lowercased # L& [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03CF ; Changes_When_Lowercased # L& GREEK CAPITAL KAI SYMBOL +03D8 ; Changes_When_Lowercased # L& GREEK LETTER ARCHAIC KOPPA +03DA ; Changes_When_Lowercased # L& GREEK LETTER STIGMA +03DC ; Changes_When_Lowercased # L& GREEK LETTER DIGAMMA +03DE ; Changes_When_Lowercased # L& GREEK LETTER KOPPA +03E0 ; Changes_When_Lowercased # L& GREEK LETTER SAMPI +03E2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SHEI +03E4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER FEI +03E6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KHEI +03E8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER HORI +03EA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER GANGIA +03EC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SHIMA +03EE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DEI +03F4 ; Changes_When_Lowercased # L& GREEK CAPITAL THETA SYMBOL +03F7 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER SHO +03F9..03FA ; Changes_When_Lowercased # L& [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Changes_When_Lowercased # L& [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER OMEGA +0462 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YAT +0464 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BIG YUS +046C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KSI +0470 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER PSI +0472 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER FITA +0474 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER UK +047A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER OT +0480 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOPPA +048A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA +04BC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Changes_When_Lowercased # L& [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SCHWA +04DA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BARRED O +04EA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER LHA +0516 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER RHA +0518 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YAE +051A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER QA +051C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER WE +051E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZZHE +052C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DCHE +052E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Changes_When_Lowercased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +10A0..10C5 ; Changes_When_Lowercased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN +13A0..13F5 ; Changes_When_Lowercased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TJE +1C90..1CBA ; Changes_When_Lowercased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Changes_When_Lowercased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SHARP S +1EA0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Changes_When_Lowercased # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Changes_When_Lowercased # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F88..1F8F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F98..1F9F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA8..1FAF ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB8..1FBC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER ALPHA WITH VRACHY..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FC8..1FCC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER EPSILON WITH VARIA..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD8..1FDB ; Changes_When_Lowercased # L& [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF8..1FFC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER OMICRON WITH VARIA..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126 ; Changes_When_Lowercased # L& OHM SIGN +212A..212B ; Changes_When_Lowercased # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Changes_When_Lowercased # L& TURNED CAPITAL F +2160..216F ; Changes_When_Lowercased # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +2183 ; Changes_When_Lowercased # L& ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24CF ; Changes_When_Lowercased # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +2C00..2C2F ; Changes_When_Lowercased # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER VIDA +2C84 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER GAMMA +2C86 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DALDA +2C88 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER EIE +2C8A ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SOU +2C8C ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER ZATA +2C8E ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER HATE +2C90 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER THETHE +2C92 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER IAUDA +2C94 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KAPA +2C96 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER LAULA +2C98 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER MI +2C9A ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER NI +2C9C ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KSI +2C9E ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER O +2CA0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER PI +2CA2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER RO +2CA4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SIMA +2CA6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER TAU +2CA8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER UA +2CAA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER FI +2CAC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KHI +2CAE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER PSI +2CB0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OOU +2CB2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SAMPI +2CC2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZELO +A644 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTA +A648 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DJERV +A64A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YN +A660 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DWE +A682 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZWE +A684 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZHWE +A686 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CCHE +A688 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZZE +A68A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TWE +A68E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TSWE +A690 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TSSE +A692 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TCHE +A694 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HWE +A696 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHWE +A698 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER HENG +A728 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TZ +A72A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TRESILLO +A72C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CUATRILLO +A72E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AA +A734 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AO +A736 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AU +A738 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AV +A73A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AY +A73E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH STROKE +A742 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BROKEN L +A748 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH LOOP +A74E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OO +A750 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R ROTUNDA +A75C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VY +A762 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VEND +A76A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ET +A76C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER IS +A76E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CON +A779 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR D +A77B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED L +A782 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR R +A784 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR S +A786 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR T +A78B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SALTILLO +A78D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED H +A790 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH BAR +A796 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER F WITH STROKE +A79A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Changes_When_Lowercased # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Changes_When_Lowercased # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OMEGA +A7B8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED HALF H +FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Changes_When_Lowercased # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Changes_When_Lowercased # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Changes_When_Lowercased # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Changes_When_Lowercased # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Changes_When_Lowercased # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Changes_When_Lowercased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Changes_When_Lowercased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA + +# Total code points: 1460 + +# ================================================ + +# Derived Property: Changes_When_Uppercased (CWU) +# Characters whose normalized forms are not stable under a toUppercase mapping. +# For more information, see D140 in Section 3.13, "Default Case Algorithms". +# Changes_When_Uppercased(X) is true when toUppercase(toNFD(X)) != toNFD(X) + +0061..007A ; Changes_When_Uppercased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Changes_When_Uppercased # L& MICRO SIGN +00DF..00F6 ; Changes_When_Uppercased # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Changes_When_Uppercased # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH MACRON +0103 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE +0105 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH OGONEK +0107 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH ACUTE +0109 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH CARON +010F ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH CARON +0111 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH STROKE +0113 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH MACRON +0115 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH BREVE +0117 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH OGONEK +011B ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CARON +011D ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH BREVE +0121 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH CEDILLA +0125 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH STROKE +0129 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH TILDE +012B ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH MACRON +012D ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH BREVE +012F ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH OGONEK +0131 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOTLESS I +0133 ; Changes_When_Uppercased # L& LATIN SMALL LIGATURE IJ +0135 ; Changes_When_Uppercased # L& LATIN SMALL LETTER J WITH CIRCUMFLEX +0137 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH CEDILLA +013A ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH ACUTE +013C ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH CEDILLA +013E ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH CARON +0140 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH STROKE +0144 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH ACUTE +0146 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Changes_When_Uppercased # L& LATIN SMALL LETTER ENG +014D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH MACRON +014F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH BREVE +0151 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Changes_When_Uppercased # L& LATIN SMALL LIGATURE OE +0155 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH ACUTE +0157 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH CEDILLA +0159 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH CARON +015B ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH ACUTE +015D ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CEDILLA +0161 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CARON +0163 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH CEDILLA +0165 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH CARON +0167 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH STROKE +0169 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH TILDE +016B ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH MACRON +016D ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH BREVE +016F ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH OGONEK +0175 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH ACUTE +017C ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Changes_When_Uppercased # L& [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH TOPBAR +0185 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TONE SIX +0188 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH HOOK +018C ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH TOPBAR +0192 ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH HOOK +0195 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HV +0199..019B ; Changes_When_Uppercased # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE +019E ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN +01A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OI +01A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH HOOK +01A8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TONE TWO +01AD ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH HOOK +01B0 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN +01B4 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH HOOK +01B6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH STROKE +01B9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EZH REVERSED +01BD ; Changes_When_Uppercased # L& LATIN SMALL LETTER TONE FIVE +01BF ; Changes_When_Uppercased # L& LATIN LETTER WYNN +01C5..01C6 ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON..LATIN SMALL LETTER DZ WITH CARON +01C8..01C9 ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER L WITH SMALL LETTER J..LATIN SMALL LETTER LJ +01CB..01CC ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER N WITH SMALL LETTER J..LATIN SMALL LETTER NJ +01CE ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CARON +01D0 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH CARON +01D2 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CARON +01D4 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH CARON +01D6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AE WITH MACRON +01E5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH STROKE +01E7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH CARON +01E9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH CARON +01EB ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH OGONEK +01ED ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F0 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON +01F2..01F3 ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER D WITH SMALL LETTER Z..LATIN SMALL LETTER DZ +01F5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH ACUTE +01F9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH GRAVE +01FB ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Changes_When_Uppercased # L& LATIN SMALL LETTER AE WITH ACUTE +01FF ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Changes_When_Uppercased # L& LATIN SMALL LETTER YOGH +021F ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH CARON +0223 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OU +0225 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH HOOK +0227 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CEDILLA +022B ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH MACRON +023C ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL STOP +0247 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH STROKE +0249 ; Changes_When_Uppercased # L& LATIN SMALL LETTER J WITH STROKE +024B ; Changes_When_Uppercased # L& LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH STROKE +024F..0254 ; Changes_When_Uppercased # L& [6] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER OPEN O +0256..0257 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER D WITH TAIL..LATIN SMALL LETTER D WITH HOOK +0259 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SCHWA +025B..025C ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E +0260..0261 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G +0263..0266 ; Changes_When_Uppercased # L& [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK +0268..026C ; Changes_When_Uppercased # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT +026F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED M +0271..0272 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK +0275 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED O +027D ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH TAIL +0280 ; Changes_When_Uppercased # L& LATIN LETTER SMALL CAPITAL R +0282..0283 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER S WITH HOOK..LATIN SMALL LETTER ESH +0287..028C ; Changes_When_Uppercased # L& [6] LATIN SMALL LETTER TURNED T..LATIN SMALL LETTER TURNED V +0292 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EZH +029D..029E ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER TURNED K +0345 ; Changes_When_Uppercased # Mn COMBINING GREEK YPOGEGRAMMENI +0371 ; Changes_When_Uppercased # L& GREEK SMALL LETTER HETA +0373 ; Changes_When_Uppercased # L& GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Changes_When_Uppercased # L& GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Changes_When_Uppercased # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Changes_When_Uppercased # L& GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Changes_When_Uppercased # L& [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Changes_When_Uppercased # L& [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Changes_When_Uppercased # L& [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Changes_When_Uppercased # L& GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Changes_When_Uppercased # L& GREEK SMALL LETTER STIGMA +03DD ; Changes_When_Uppercased # L& GREEK SMALL LETTER DIGAMMA +03DF ; Changes_When_Uppercased # L& GREEK SMALL LETTER KOPPA +03E1 ; Changes_When_Uppercased # L& GREEK SMALL LETTER SAMPI +03E3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SHEI +03E5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER FEI +03E7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KHEI +03E9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER HORI +03EB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER GANGIA +03ED ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Changes_When_Uppercased # L& [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Changes_When_Uppercased # L& GREEK LUNATE EPSILON SYMBOL +03F8 ; Changes_When_Uppercased # L& GREEK SMALL LETTER SHO +03FB ; Changes_When_Uppercased # L& GREEK SMALL LETTER SAN +0430..045F ; Changes_When_Uppercased # L& [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER OMEGA +0463 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YAT +0465 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BIG YUS +046D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KSI +0471 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER PSI +0473 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER FITA +0475 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IZHITSA +0477 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER UK +047B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER OT +0481 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOPPA +048B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHHA +04BD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Changes_When_Uppercased # L& [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LIGATURE A IE +04D7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SCHWA +04DB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BARRED O +04EB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI DE +0503 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI DJE +0505 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI LJE +050B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI NJE +050D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI SJE +050F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI TJE +0511 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER LHA +0517 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER RHA +0519 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YAE +051B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER QA +051D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER WE +051F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ALEUT KA +0521 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZZHE +052D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DCHE +052F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH DESCENDER +0561..0587 ; Changes_When_Uppercased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +10D0..10FA ; Changes_When_Uppercased # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Changes_When_Uppercased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13F8..13FD ; Changes_When_Uppercased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Changes_When_Uppercased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TJE +1D79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR G +1D7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE +1D8E ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK +1E01 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH MACRON +1E23 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH ACUTE +1E33 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Changes_When_Uppercased # L& LATIN SMALL LETTER M WITH ACUTE +1E41 ; Changes_When_Uppercased # L& LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Changes_When_Uppercased # L& LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH ACUTE +1E57 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER V WITH TILDE +1E7F ; Changes_When_Uppercased # L& LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH GRAVE +1E83 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH ACUTE +1E85 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Changes_When_Uppercased # L& LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Changes_When_Uppercased # L& LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9B ; Changes_When_Uppercased # L& [7] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH TILDE +1EBF ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH TILDE +1EFB ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Changes_When_Uppercased # L& [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Changes_When_Uppercased # L& [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Changes_When_Uppercased # L& [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Changes_When_Uppercased # L& [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Changes_When_Uppercased # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBC ; Changes_When_Uppercased # L& GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Changes_When_Uppercased # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Uppercased # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FCC ; Changes_When_Uppercased # L& GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Changes_When_Uppercased # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Changes_When_Uppercased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FFC ; Changes_When_Uppercased # L& GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +214E ; Changes_When_Uppercased # L& TURNED SMALL F +2170..217F ; Changes_When_Uppercased # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2184 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED C +24D0..24E9 ; Changes_When_Uppercased # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C30..2C5F ; Changes_When_Uppercased # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH DESCENDER +2C73 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH HOOK +2C76 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HALF H +2C81 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER ALFA +2C83 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER VIDA +2C85 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER GAMMA +2C87 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DALDA +2C89 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER EIE +2C8B ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SOU +2C8D ; Changes_When_Uppercased # L& COPTIC SMALL LETTER ZATA +2C8F ; Changes_When_Uppercased # L& COPTIC SMALL LETTER HATE +2C91 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER THETHE +2C93 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER IAUDA +2C95 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KAPA +2C97 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER LAULA +2C99 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER MI +2C9B ; Changes_When_Uppercased # L& COPTIC SMALL LETTER NI +2C9D ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KSI +2C9F ; Changes_When_Uppercased # L& COPTIC SMALL LETTER O +2CA1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER PI +2CA3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER RO +2CA5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SIMA +2CA7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER TAU +2CA9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER UA +2CAB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER FI +2CAD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KHI +2CAF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER PSI +2CB1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OOU +2CB3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SAMPI +2CC3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN WAU +2CEC ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Changes_When_Uppercased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER AEN +A641 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZEMLYA +A643 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZELO +A645 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTA +A649 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DJERV +A64B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED YU +A657 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YN +A661 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SOFT DE +A665 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SOFT EL +A667 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SOFT EM +A669 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DWE +A683 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZWE +A685 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHWE +A687 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CCHE +A689 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZZE +A68B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TWE +A68F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TSWE +A691 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TSSE +A693 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TCHE +A695 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HWE +A697 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHWE +A699 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DOUBLE O +A69B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CROSSED O +A723 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HENG +A729 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TZ +A72B ; Changes_When_Uppercased # L& LATIN SMALL LETTER TRESILLO +A72D ; Changes_When_Uppercased # L& LATIN SMALL LETTER CUATRILLO +A72F ; Changes_When_Uppercased # L& LATIN SMALL LETTER CUATRILLO WITH COMMA +A733 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AA +A735 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AO +A737 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AU +A739 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AV +A73B ; Changes_When_Uppercased # L& LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Changes_When_Uppercased # L& LATIN SMALL LETTER AY +A73F ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH STROKE +A743 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BROKEN L +A749 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH LOOP +A74F ; Changes_When_Uppercased # L& LATIN SMALL LETTER OO +A751 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH FLOURISH +A755 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Changes_When_Uppercased # L& LATIN SMALL LETTER R ROTUNDA +A75D ; Changes_When_Uppercased # L& LATIN SMALL LETTER RUM ROTUNDA +A75F ; Changes_When_Uppercased # L& LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Changes_When_Uppercased # L& LATIN SMALL LETTER VY +A763 ; Changes_When_Uppercased # L& LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Changes_When_Uppercased # L& LATIN SMALL LETTER THORN WITH STROKE +A767 ; Changes_When_Uppercased # L& LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Changes_When_Uppercased # L& LATIN SMALL LETTER VEND +A76B ; Changes_When_Uppercased # L& LATIN SMALL LETTER ET +A76D ; Changes_When_Uppercased # L& LATIN SMALL LETTER IS +A76F ; Changes_When_Uppercased # L& LATIN SMALL LETTER CON +A77A ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR D +A77C ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR F +A77F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED INSULAR G +A781 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED L +A783 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR R +A785 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR S +A787 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR T +A78C ; Changes_When_Uppercased # L& LATIN SMALL LETTER SALTILLO +A791 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DESCENDER +A793..A794 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER C WITH PALATAL HOOK +A797 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH FLOURISH +A799 ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH STROKE +A79B ; Changes_When_Uppercased # L& LATIN SMALL LETTER VOLAPUK AE +A79D ; Changes_When_Uppercased # L& LATIN SMALL LETTER VOLAPUK OE +A79F ; Changes_When_Uppercased # L& LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7B5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BETA +A7B7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OMEGA +A7B9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH STROKE +A7BB ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL A +A7BD ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL I +A7BF ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OLD POLISH O +A7C3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER ANGLICANA W +A7C8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Changes_When_Uppercased # L& LATIN SMALL LETTER LAMBDA +A7F6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED HALF H +AB53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CHI +AB70..ABBF ; Changes_When_Uppercased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Uppercased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Uppercased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Changes_When_Uppercased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Changes_When_Uppercased # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Changes_When_Uppercased # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Changes_When_Uppercased # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Changes_When_Uppercased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Changes_When_Uppercased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10CC0..10CF2 ; Changes_When_Uppercased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Changes_When_Uppercased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 1552 + +# ================================================ + +# Derived Property: Changes_When_Titlecased (CWT) +# Characters whose normalized forms are not stable under a toTitlecase mapping. +# For more information, see D141 in Section 3.13, "Default Case Algorithms". +# Changes_When_Titlecased(X) is true when toTitlecase(toNFD(X)) != toNFD(X) + +0061..007A ; Changes_When_Titlecased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Changes_When_Titlecased # L& MICRO SIGN +00DF..00F6 ; Changes_When_Titlecased # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Changes_When_Titlecased # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH MACRON +0103 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE +0105 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH OGONEK +0107 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH ACUTE +0109 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH CARON +010F ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH CARON +0111 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH STROKE +0113 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH MACRON +0115 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH BREVE +0117 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH OGONEK +011B ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CARON +011D ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH BREVE +0121 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH CEDILLA +0125 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH STROKE +0129 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH TILDE +012B ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH MACRON +012D ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH BREVE +012F ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH OGONEK +0131 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOTLESS I +0133 ; Changes_When_Titlecased # L& LATIN SMALL LIGATURE IJ +0135 ; Changes_When_Titlecased # L& LATIN SMALL LETTER J WITH CIRCUMFLEX +0137 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH CEDILLA +013A ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH ACUTE +013C ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH CEDILLA +013E ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH CARON +0140 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH STROKE +0144 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH ACUTE +0146 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Changes_When_Titlecased # L& LATIN SMALL LETTER ENG +014D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH MACRON +014F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH BREVE +0151 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Changes_When_Titlecased # L& LATIN SMALL LIGATURE OE +0155 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH ACUTE +0157 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH CEDILLA +0159 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH CARON +015B ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH ACUTE +015D ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CEDILLA +0161 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CARON +0163 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH CEDILLA +0165 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH CARON +0167 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH STROKE +0169 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH TILDE +016B ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH MACRON +016D ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH BREVE +016F ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH OGONEK +0175 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH ACUTE +017C ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH TOPBAR +0185 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TONE SIX +0188 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH HOOK +018C ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH TOPBAR +0192 ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH HOOK +0195 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HV +0199..019B ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE +019E ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN +01A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OI +01A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH HOOK +01A8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TONE TWO +01AD ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH HOOK +01B0 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN +01B4 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH HOOK +01B6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH STROKE +01B9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EZH REVERSED +01BD ; Changes_When_Titlecased # L& LATIN SMALL LETTER TONE FIVE +01BF ; Changes_When_Titlecased # L& LATIN LETTER WYNN +01C4 ; Changes_When_Titlecased # L& LATIN CAPITAL LETTER DZ WITH CARON +01C6..01C7 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER DZ WITH CARON..LATIN CAPITAL LETTER LJ +01C9..01CA ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER LJ..LATIN CAPITAL LETTER NJ +01CC ; Changes_When_Titlecased # L& LATIN SMALL LETTER NJ +01CE ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CARON +01D0 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH CARON +01D2 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CARON +01D4 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH CARON +01D6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AE WITH MACRON +01E5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH STROKE +01E7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH CARON +01E9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH CARON +01EB ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH OGONEK +01ED ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F1 ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER EZH WITH CARON..LATIN CAPITAL LETTER DZ +01F3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DZ +01F5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH ACUTE +01F9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH GRAVE +01FB ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Changes_When_Titlecased # L& LATIN SMALL LETTER AE WITH ACUTE +01FF ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Changes_When_Titlecased # L& LATIN SMALL LETTER YOGH +021F ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH CARON +0223 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OU +0225 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH HOOK +0227 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CEDILLA +022B ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH MACRON +023C ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL STOP +0247 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH STROKE +0249 ; Changes_When_Titlecased # L& LATIN SMALL LETTER J WITH STROKE +024B ; Changes_When_Titlecased # L& LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH STROKE +024F..0254 ; Changes_When_Titlecased # L& [6] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER OPEN O +0256..0257 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER D WITH TAIL..LATIN SMALL LETTER D WITH HOOK +0259 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SCHWA +025B..025C ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E +0260..0261 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G +0263..0266 ; Changes_When_Titlecased # L& [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK +0268..026C ; Changes_When_Titlecased # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT +026F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED M +0271..0272 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK +0275 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED O +027D ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH TAIL +0280 ; Changes_When_Titlecased # L& LATIN LETTER SMALL CAPITAL R +0282..0283 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER S WITH HOOK..LATIN SMALL LETTER ESH +0287..028C ; Changes_When_Titlecased # L& [6] LATIN SMALL LETTER TURNED T..LATIN SMALL LETTER TURNED V +0292 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EZH +029D..029E ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER TURNED K +0345 ; Changes_When_Titlecased # Mn COMBINING GREEK YPOGEGRAMMENI +0371 ; Changes_When_Titlecased # L& GREEK SMALL LETTER HETA +0373 ; Changes_When_Titlecased # L& GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Changes_When_Titlecased # L& GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Changes_When_Titlecased # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Changes_When_Titlecased # L& GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Changes_When_Titlecased # L& [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Changes_When_Titlecased # L& [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Changes_When_Titlecased # L& [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Changes_When_Titlecased # L& GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Changes_When_Titlecased # L& GREEK SMALL LETTER STIGMA +03DD ; Changes_When_Titlecased # L& GREEK SMALL LETTER DIGAMMA +03DF ; Changes_When_Titlecased # L& GREEK SMALL LETTER KOPPA +03E1 ; Changes_When_Titlecased # L& GREEK SMALL LETTER SAMPI +03E3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SHEI +03E5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER FEI +03E7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KHEI +03E9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER HORI +03EB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER GANGIA +03ED ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Changes_When_Titlecased # L& [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Changes_When_Titlecased # L& GREEK LUNATE EPSILON SYMBOL +03F8 ; Changes_When_Titlecased # L& GREEK SMALL LETTER SHO +03FB ; Changes_When_Titlecased # L& GREEK SMALL LETTER SAN +0430..045F ; Changes_When_Titlecased # L& [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER OMEGA +0463 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YAT +0465 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BIG YUS +046D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KSI +0471 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER PSI +0473 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER FITA +0475 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IZHITSA +0477 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER UK +047B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER OT +0481 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOPPA +048B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHHA +04BD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Changes_When_Titlecased # L& [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LIGATURE A IE +04D7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SCHWA +04DB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BARRED O +04EB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI DE +0503 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI DJE +0505 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI LJE +050B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI NJE +050D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI SJE +050F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI TJE +0511 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER LHA +0517 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER RHA +0519 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YAE +051B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER QA +051D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER WE +051F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ALEUT KA +0521 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZZHE +052D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DCHE +052F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH DESCENDER +0561..0587 ; Changes_When_Titlecased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +13F8..13FD ; Changes_When_Titlecased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Changes_When_Titlecased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TJE +1D79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR G +1D7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE +1D8E ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK +1E01 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH MACRON +1E23 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH ACUTE +1E33 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Changes_When_Titlecased # L& LATIN SMALL LETTER M WITH ACUTE +1E41 ; Changes_When_Titlecased # L& LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Changes_When_Titlecased # L& LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH ACUTE +1E57 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER V WITH TILDE +1E7F ; Changes_When_Titlecased # L& LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH GRAVE +1E83 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH ACUTE +1E85 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Changes_When_Titlecased # L& LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Changes_When_Titlecased # L& LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9B ; Changes_When_Titlecased # L& [7] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH TILDE +1EBF ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH TILDE +1EFB ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Changes_When_Titlecased # L& [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Changes_When_Titlecased # L& [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Changes_When_Titlecased # L& [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Changes_When_Titlecased # L& [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1F87 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F90..1F97 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA0..1FA7 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FB0..1FB4 ; Changes_When_Titlecased # L& [5] GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBE ; Changes_When_Titlecased # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Titlecased # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FD0..1FD3 ; Changes_When_Titlecased # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Changes_When_Titlecased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +214E ; Changes_When_Titlecased # L& TURNED SMALL F +2170..217F ; Changes_When_Titlecased # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2184 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED C +24D0..24E9 ; Changes_When_Titlecased # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C30..2C5F ; Changes_When_Titlecased # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH DESCENDER +2C73 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH HOOK +2C76 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HALF H +2C81 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER ALFA +2C83 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER VIDA +2C85 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER GAMMA +2C87 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DALDA +2C89 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER EIE +2C8B ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SOU +2C8D ; Changes_When_Titlecased # L& COPTIC SMALL LETTER ZATA +2C8F ; Changes_When_Titlecased # L& COPTIC SMALL LETTER HATE +2C91 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER THETHE +2C93 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER IAUDA +2C95 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KAPA +2C97 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER LAULA +2C99 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER MI +2C9B ; Changes_When_Titlecased # L& COPTIC SMALL LETTER NI +2C9D ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KSI +2C9F ; Changes_When_Titlecased # L& COPTIC SMALL LETTER O +2CA1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER PI +2CA3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER RO +2CA5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SIMA +2CA7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER TAU +2CA9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER UA +2CAB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER FI +2CAD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KHI +2CAF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER PSI +2CB1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OOU +2CB3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SAMPI +2CC3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN WAU +2CEC ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Changes_When_Titlecased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER AEN +A641 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZEMLYA +A643 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZELO +A645 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTA +A649 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DJERV +A64B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED YU +A657 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YN +A661 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SOFT DE +A665 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SOFT EL +A667 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SOFT EM +A669 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DWE +A683 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZWE +A685 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHWE +A687 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CCHE +A689 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZZE +A68B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TWE +A68F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TSWE +A691 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TSSE +A693 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TCHE +A695 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HWE +A697 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHWE +A699 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DOUBLE O +A69B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CROSSED O +A723 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HENG +A729 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TZ +A72B ; Changes_When_Titlecased # L& LATIN SMALL LETTER TRESILLO +A72D ; Changes_When_Titlecased # L& LATIN SMALL LETTER CUATRILLO +A72F ; Changes_When_Titlecased # L& LATIN SMALL LETTER CUATRILLO WITH COMMA +A733 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AA +A735 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AO +A737 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AU +A739 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AV +A73B ; Changes_When_Titlecased # L& LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Changes_When_Titlecased # L& LATIN SMALL LETTER AY +A73F ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH STROKE +A743 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BROKEN L +A749 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH LOOP +A74F ; Changes_When_Titlecased # L& LATIN SMALL LETTER OO +A751 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH FLOURISH +A755 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Changes_When_Titlecased # L& LATIN SMALL LETTER R ROTUNDA +A75D ; Changes_When_Titlecased # L& LATIN SMALL LETTER RUM ROTUNDA +A75F ; Changes_When_Titlecased # L& LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Changes_When_Titlecased # L& LATIN SMALL LETTER VY +A763 ; Changes_When_Titlecased # L& LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Changes_When_Titlecased # L& LATIN SMALL LETTER THORN WITH STROKE +A767 ; Changes_When_Titlecased # L& LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Changes_When_Titlecased # L& LATIN SMALL LETTER VEND +A76B ; Changes_When_Titlecased # L& LATIN SMALL LETTER ET +A76D ; Changes_When_Titlecased # L& LATIN SMALL LETTER IS +A76F ; Changes_When_Titlecased # L& LATIN SMALL LETTER CON +A77A ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR D +A77C ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR F +A77F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED INSULAR G +A781 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED L +A783 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR R +A785 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR S +A787 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR T +A78C ; Changes_When_Titlecased # L& LATIN SMALL LETTER SALTILLO +A791 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DESCENDER +A793..A794 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER C WITH PALATAL HOOK +A797 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH FLOURISH +A799 ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH STROKE +A79B ; Changes_When_Titlecased # L& LATIN SMALL LETTER VOLAPUK AE +A79D ; Changes_When_Titlecased # L& LATIN SMALL LETTER VOLAPUK OE +A79F ; Changes_When_Titlecased # L& LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7B5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BETA +A7B7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OMEGA +A7B9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH STROKE +A7BB ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL A +A7BD ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL I +A7BF ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OLD POLISH O +A7C3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER ANGLICANA W +A7C8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S +A7DB ; Changes_When_Titlecased # L& LATIN SMALL LETTER LAMBDA +A7F6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED HALF H +AB53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CHI +AB70..ABBF ; Changes_When_Titlecased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Titlecased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Titlecased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Changes_When_Titlecased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Changes_When_Titlecased # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Changes_When_Titlecased # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Changes_When_Titlecased # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Changes_When_Titlecased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Changes_When_Titlecased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10CC0..10CF2 ; Changes_When_Titlecased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Changes_When_Titlecased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 1479 + +# ================================================ + +# Derived Property: Changes_When_Casefolded (CWCF) +# Characters whose normalized forms are not stable under case folding. +# For more information, see D142 in Section 3.13, "Default Case Algorithms". +# Changes_When_Casefolded(X) is true when toCasefold(toNFD(X)) != toNFD(X) + +0041..005A ; Changes_When_Casefolded # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00B5 ; Changes_When_Casefolded # L& MICRO SIGN +00C0..00D6 ; Changes_When_Casefolded # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DF ; Changes_When_Casefolded # L& [8] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER SHARP S +0100 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH MACRON +0102 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE +0104 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH CARON +010E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH CARON +0110 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH STROKE +0112 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH MACRON +0114 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH BREVE +0116 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH OGONEK +011A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CARON +011C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH BREVE +0120 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH STROKE +0128 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH TILDE +012A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH MACRON +012C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH BREVE +012E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Changes_When_Casefolded # L& LATIN CAPITAL LIGATURE IJ +0134 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH ACUTE +013B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH CARON +013F ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH STROKE +0143 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH CARON +0149..014A ; Changes_When_Casefolded # L& [2] LATIN SMALL LETTER N PRECEDED BY APOSTROPHE..LATIN CAPITAL LETTER ENG +014C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH MACRON +014E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH BREVE +0150 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Changes_When_Casefolded # L& LATIN CAPITAL LIGATURE OE +0154 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH CARON +015A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH ACUTE +015C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CARON +0162 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH CARON +0166 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH STROKE +0168 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH TILDE +016A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH MACRON +016C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH BREVE +016E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH CARON +017F ; Changes_When_Casefolded # L& LATIN SMALL LETTER LONG S +0181..0182 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OI +01A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Changes_When_Casefolded # L& [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ESH +01AC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TONE FIVE +01C4..01C5 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7..01C8 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER LJ..LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA..01CB ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER NJ..LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CARON +01CF ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH CARON +01D1 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CARON +01D3 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH CARON +01D5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH CARON +01E8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH CARON +01EA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER EZH WITH CARON +01F1..01F2 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER YOGH +021E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH CARON +0220 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OU +0224 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER J WITH STROKE +024A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH STROKE +024E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH STROKE +0345 ; Changes_When_Casefolded # Mn COMBINING GREEK YPOGEGRAMMENI +0370 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER HETA +0372 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER YOT +0386 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Changes_When_Casefolded # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Changes_When_Casefolded # L& [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Changes_When_Casefolded # L& [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Changes_When_Casefolded # L& [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03C2 ; Changes_When_Casefolded # L& GREEK SMALL LETTER FINAL SIGMA +03CF..03D1 ; Changes_When_Casefolded # L& [3] GREEK CAPITAL KAI SYMBOL..GREEK THETA SYMBOL +03D5..03D6 ; Changes_When_Casefolded # L& [2] GREEK PHI SYMBOL..GREEK PI SYMBOL +03D8 ; Changes_When_Casefolded # L& GREEK LETTER ARCHAIC KOPPA +03DA ; Changes_When_Casefolded # L& GREEK LETTER STIGMA +03DC ; Changes_When_Casefolded # L& GREEK LETTER DIGAMMA +03DE ; Changes_When_Casefolded # L& GREEK LETTER KOPPA +03E0 ; Changes_When_Casefolded # L& GREEK LETTER SAMPI +03E2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SHEI +03E4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER FEI +03E6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KHEI +03E8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER HORI +03EA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER GANGIA +03EC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SHIMA +03EE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DEI +03F0..03F1 ; Changes_When_Casefolded # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Changes_When_Casefolded # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F7 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER SHO +03F9..03FA ; Changes_When_Casefolded # L& [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Changes_When_Casefolded # L& [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER OMEGA +0462 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YAT +0464 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BIG YUS +046C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KSI +0470 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER PSI +0472 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER FITA +0474 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER UK +047A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER OT +0480 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOPPA +048A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHHA +04BC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Changes_When_Casefolded # L& [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SCHWA +04DA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BARRED O +04EA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER LHA +0516 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER RHA +0518 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YAE +051A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER QA +051C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER WE +051E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZZHE +052C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DCHE +052E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Changes_When_Casefolded # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0587 ; Changes_When_Casefolded # L& ARMENIAN SMALL LIGATURE ECH YIWN +10A0..10C5 ; Changes_When_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN +13F8..13FD ; Changes_When_Casefolded # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C89 ; Changes_When_Casefolded # L& [10] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC CAPITAL LETTER TJE +1C90..1CBA ; Changes_When_Casefolded # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Changes_When_Casefolded # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9A..1E9B ; Changes_When_Casefolded # L& [2] LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SHARP S +1EA0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Changes_When_Casefolded # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Changes_When_Casefolded # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80..1FAF ; Changes_When_Casefolded # L& [48] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2..1FB4 ; Changes_When_Casefolded # L& [3] GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB7..1FBC ; Changes_When_Casefolded # L& [6] GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Casefolded # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC7..1FCC ; Changes_When_Casefolded # L& [6] GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD8..1FDB ; Changes_When_Casefolded # L& [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Changes_When_Casefolded # L& [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Changes_When_Casefolded # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF7..1FFC ; Changes_When_Casefolded # L& [6] GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126 ; Changes_When_Casefolded # L& OHM SIGN +212A..212B ; Changes_When_Casefolded # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Changes_When_Casefolded # L& TURNED CAPITAL F +2160..216F ; Changes_When_Casefolded # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +2183 ; Changes_When_Casefolded # L& ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24CF ; Changes_When_Casefolded # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +2C00..2C2F ; Changes_When_Casefolded # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER VIDA +2C84 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER GAMMA +2C86 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DALDA +2C88 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER EIE +2C8A ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SOU +2C8C ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER ZATA +2C8E ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER HATE +2C90 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER THETHE +2C92 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER IAUDA +2C94 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KAPA +2C96 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER LAULA +2C98 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER MI +2C9A ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER NI +2C9C ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KSI +2C9E ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER O +2CA0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER PI +2CA2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER RO +2CA4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SIMA +2CA6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER TAU +2CA8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER UA +2CAA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER FI +2CAC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KHI +2CAE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER PSI +2CB0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OOU +2CB2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SAMPI +2CC2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZELO +A644 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTA +A648 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DJERV +A64A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YN +A660 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DWE +A682 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZWE +A684 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZHWE +A686 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CCHE +A688 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZZE +A68A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TWE +A68E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TSWE +A690 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TSSE +A692 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TCHE +A694 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HWE +A696 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHWE +A698 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER HENG +A728 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TZ +A72A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TRESILLO +A72C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CUATRILLO +A72E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AA +A734 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AO +A736 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AU +A738 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AV +A73A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AY +A73E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH STROKE +A742 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BROKEN L +A748 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH LOOP +A74E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OO +A750 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R ROTUNDA +A75C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VY +A762 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VEND +A76A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ET +A76C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER IS +A76E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CON +A779 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR D +A77B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED L +A782 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR R +A784 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR S +A786 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR T +A78B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SALTILLO +A78D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED H +A790 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR +A796 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER F WITH STROKE +A79A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Changes_When_Casefolded # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Changes_When_Casefolded # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OMEGA +A7B8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S +A7DA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA +A7DC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H +AB70..ABBF ; Changes_When_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Changes_When_Casefolded # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Changes_When_Casefolded # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Changes_When_Casefolded # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Changes_When_Casefolded # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Changes_When_Casefolded # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Changes_When_Casefolded # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Changes_When_Casefolded # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Changes_When_Casefolded # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA + +# Total code points: 1533 + +# ================================================ + +# Derived Property: Changes_When_Casemapped (CWCM) +# Characters whose normalized forms are not stable under case mapping. +# For more information, see D143 in Section 3.13, "Default Case Algorithms". +# Changes_When_Casemapped(X) is true when CWL(X), or CWT(X), or CWU(X) + +0041..005A ; Changes_When_Casemapped # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Changes_When_Casemapped # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Changes_When_Casemapped # L& MICRO SIGN +00C0..00D6 ; Changes_When_Casemapped # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Changes_When_Casemapped # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..0137 ; Changes_When_Casemapped # L& [64] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER K WITH CEDILLA +0139..018C ; Changes_When_Casemapped # L& [84] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER D WITH TOPBAR +018E..01A9 ; Changes_When_Casemapped # L& [28] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER ESH +01AC..01B9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER T WITH HOOK..LATIN SMALL LETTER EZH REVERSED +01BC..01BD ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER TONE FIVE..LATIN SMALL LETTER TONE FIVE +01BF ; Changes_When_Casemapped # L& LATIN LETTER WYNN +01C4..0220 ; Changes_When_Casemapped # L& [93] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222..0233 ; Changes_When_Casemapped # L& [18] LATIN CAPITAL LETTER OU..LATIN SMALL LETTER Y WITH MACRON +023A..0254 ; Changes_When_Casemapped # L& [27] LATIN CAPITAL LETTER A WITH STROKE..LATIN SMALL LETTER OPEN O +0256..0257 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER D WITH TAIL..LATIN SMALL LETTER D WITH HOOK +0259 ; Changes_When_Casemapped # L& LATIN SMALL LETTER SCHWA +025B..025C ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E +0260..0261 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G +0263..0266 ; Changes_When_Casemapped # L& [4] LATIN SMALL LETTER GAMMA..LATIN SMALL LETTER H WITH HOOK +0268..026C ; Changes_When_Casemapped # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT +026F ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED M +0271..0272 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK +0275 ; Changes_When_Casemapped # L& LATIN SMALL LETTER BARRED O +027D ; Changes_When_Casemapped # L& LATIN SMALL LETTER R WITH TAIL +0280 ; Changes_When_Casemapped # L& LATIN LETTER SMALL CAPITAL R +0282..0283 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER S WITH HOOK..LATIN SMALL LETTER ESH +0287..028C ; Changes_When_Casemapped # L& [6] LATIN SMALL LETTER TURNED T..LATIN SMALL LETTER TURNED V +0292 ; Changes_When_Casemapped # L& LATIN SMALL LETTER EZH +029D..029E ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER TURNED K +0345 ; Changes_When_Casemapped # Mn COMBINING GREEK YPOGEGRAMMENI +0370..0373 ; Changes_When_Casemapped # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; Changes_When_Casemapped # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Changes_When_Casemapped # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER YOT +0386 ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Changes_When_Casemapped # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Changes_When_Casemapped # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03D1 ; Changes_When_Casemapped # L& [47] GREEK CAPITAL LETTER SIGMA..GREEK THETA SYMBOL +03D5..03F5 ; Changes_When_Casemapped # L& [33] GREEK PHI SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F7..03FB ; Changes_When_Casemapped # L& [5] GREEK CAPITAL LETTER SHO..GREEK SMALL LETTER SAN +03FD..0481 ; Changes_When_Casemapped # L& [133] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC SMALL LETTER KOPPA +048A..052F ; Changes_When_Casemapped # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Changes_When_Casemapped # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0561..0587 ; Changes_When_Casemapped # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +10A0..10C5 ; Changes_When_Casemapped # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Changes_When_Casemapped # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Changes_When_Casemapped # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13A0..13F5 ; Changes_When_Casemapped # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Changes_When_Casemapped # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C8A ; Changes_When_Casemapped # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; Changes_When_Casemapped # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Changes_When_Casemapped # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G +1D7D ; Changes_When_Casemapped # L& LATIN SMALL LETTER P WITH STROKE +1D8E ; Changes_When_Casemapped # L& LATIN SMALL LETTER Z WITH PALATAL HOOK +1E00..1E9B ; Changes_When_Casemapped # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E ; Changes_When_Casemapped # L& LATIN CAPITAL LETTER SHARP S +1EA0..1F15 ; Changes_When_Casemapped # L& [118] LATIN CAPITAL LETTER A WITH DOT BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Changes_When_Casemapped # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Changes_When_Casemapped # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Changes_When_Casemapped # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Changes_When_Casemapped # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Changes_When_Casemapped # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Changes_When_Casemapped # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Changes_When_Casemapped # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Changes_When_Casemapped # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Casemapped # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Changes_When_Casemapped # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Changes_When_Casemapped # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Changes_When_Casemapped # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; Changes_When_Casemapped # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Changes_When_Casemapped # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Changes_When_Casemapped # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126 ; Changes_When_Casemapped # L& OHM SIGN +212A..212B ; Changes_When_Casemapped # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Changes_When_Casemapped # L& TURNED CAPITAL F +214E ; Changes_When_Casemapped # L& TURNED SMALL F +2160..217F ; Changes_When_Casemapped # Nl [32] ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2183..2184 ; Changes_When_Casemapped # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +24B6..24E9 ; Changes_When_Casemapped # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C70 ; Changes_When_Casemapped # L& [113] GLAGOLITIC CAPITAL LETTER AZU..LATIN CAPITAL LETTER TURNED ALPHA +2C72..2C73 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER W WITH HOOK..LATIN SMALL LETTER W WITH HOOK +2C75..2C76 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER HALF H..LATIN SMALL LETTER HALF H +2C7E..2CE3 ; Changes_When_Casemapped # L& [102] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SMALL LETTER OLD NUBIAN WAU +2CEB..2CEE ; Changes_When_Casemapped # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Changes_When_Casemapped # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Changes_When_Casemapped # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER AEN +A640..A66D ; Changes_When_Casemapped # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A680..A69B ; Changes_When_Casemapped # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A722..A72F ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CUATRILLO WITH COMMA +A732..A76F ; Changes_When_Casemapped # L& [62] LATIN CAPITAL LETTER AA..LATIN SMALL LETTER CON +A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR D..LATIN SMALL LETTER INSULAR T +A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H +A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK +A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7CD ; Changes_When_Casemapped # L& [30] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D6..A7DC ; Changes_When_Casemapped # L& [7] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI +AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Casemapped # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Casemapped # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10400..1044F ; Changes_When_Casemapped # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +104B0..104D3 ; Changes_When_Casemapped # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Changes_When_Casemapped # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10570..1057A ; Changes_When_Casemapped # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Changes_When_Casemapped # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Changes_When_Casemapped # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Changes_When_Casemapped # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Changes_When_Casemapped # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Changes_When_Casemapped # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Changes_When_Casemapped # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Changes_When_Casemapped # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10C80..10CB2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D50..10D65 ; Changes_When_Casemapped # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Changes_When_Casemapped # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 2981 + +# ================================================ + +# Derived Property: ID_Start +# Characters that can start an identifier. +# Generated from: +# Lu + Ll + Lt + Lm + Lo + Nl +# + Other_ID_Start +# - Pattern_Syntax +# - Pattern_White_Space +# NOTE: See UAX #31 for more information + +0041..005A ; ID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; ID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; ID_Start # Lo FEMININE ORDINAL INDICATOR +00B5 ; ID_Start # L& MICRO SIGN +00BA ; ID_Start # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; ID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; ID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; ID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; ID_Start # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; ID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; ID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; ID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; ID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; ID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; ID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; ID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; ID_Start # Lm MODIFIER LETTER VOICING +02EE ; ID_Start # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; ID_Start # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; ID_Start # Lm GREEK NUMERAL SIGN +0376..0377 ; ID_Start # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; ID_Start # Lm GREEK YPOGEGRAMMENI +037B..037D ; ID_Start # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; ID_Start # L& GREEK CAPITAL LETTER YOT +0386 ; ID_Start # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; ID_Start # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; ID_Start # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; ID_Start # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; ID_Start # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; ID_Start # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; ID_Start # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; ID_Start # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; ID_Start # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; ID_Start # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +05D0..05EA ; ID_Start # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; ID_Start # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0620..063F ; ID_Start # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; ID_Start # Lm ARABIC TATWEEL +0641..064A ; ID_Start # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; ID_Start # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; ID_Start # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; ID_Start # Lo ARABIC LETTER AE +06E5..06E6 ; ID_Start # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; ID_Start # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; ID_Start # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; ID_Start # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; ID_Start # Lo SYRIAC LETTER ALAPH +0712..072F ; ID_Start # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; ID_Start # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; ID_Start # Lo THAANA LETTER NAA +07CA..07EA ; ID_Start # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; ID_Start # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; ID_Start # Lm NKO LAJANYALAN +0800..0815 ; ID_Start # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; ID_Start # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; ID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; ID_Start # Lm SAMARITAN MODIFIER LETTER I +0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; ID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; ID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; ID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; ID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; ID_Start # Lm ARABIC SMALL FARSI YEH +0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; ID_Start # Lo DEVANAGARI SIGN AVAGRAHA +0950 ; ID_Start # Lo DEVANAGARI OM +0958..0961 ; ID_Start # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0971 ; ID_Start # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; ID_Start # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0985..098C ; ID_Start # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; ID_Start # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; ID_Start # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; ID_Start # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; ID_Start # Lo BENGALI LETTER LA +09B6..09B9 ; ID_Start # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; ID_Start # Lo BENGALI SIGN AVAGRAHA +09CE ; ID_Start # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; ID_Start # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; ID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; ID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; ID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +0A05..0A0A ; ID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; ID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; ID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; ID_Start # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; ID_Start # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; ID_Start # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; ID_Start # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A59..0A5C ; ID_Start # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; ID_Start # Lo GURMUKHI LETTER FA +0A72..0A74 ; ID_Start # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A85..0A8D ; ID_Start # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; ID_Start # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; ID_Start # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; ID_Start # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; ID_Start # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; ID_Start # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; ID_Start # Lo GUJARATI SIGN AVAGRAHA +0AD0 ; ID_Start # Lo GUJARATI OM +0AE0..0AE1 ; ID_Start # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF9 ; ID_Start # Lo GUJARATI LETTER ZHA +0B05..0B0C ; ID_Start # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; ID_Start # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; ID_Start # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; ID_Start # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; ID_Start # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; ID_Start # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; ID_Start # Lo ORIYA SIGN AVAGRAHA +0B5C..0B5D ; ID_Start # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; ID_Start # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; ID_Start # Lo ORIYA LETTER WA +0B83 ; ID_Start # Lo TAMIL SIGN VISARGA +0B85..0B8A ; ID_Start # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; ID_Start # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; ID_Start # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; ID_Start # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; ID_Start # Lo TAMIL LETTER JA +0B9E..0B9F ; ID_Start # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; ID_Start # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; ID_Start # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; ID_Start # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BD0 ; ID_Start # Lo TAMIL OM +0C05..0C0C ; ID_Start # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; ID_Start # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; ID_Start # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA +0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU +0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; ID_Start # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; ID_Start # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA +0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; ID_Start # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; ID_Start # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; ID_Start # Lo MALAYALAM SIGN AVAGRAHA +0D4E ; ID_Start # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; ID_Start # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D5F..0D61 ; ID_Start # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D7A..0D7F ; ID_Start # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D85..0D96 ; ID_Start # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; ID_Start # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; ID_Start # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; ID_Start # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; ID_Start # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0E01..0E30 ; ID_Start # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; ID_Start # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E40..0E45 ; ID_Start # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; ID_Start # Lm THAI CHARACTER MAIYAMOK +0E81..0E82 ; ID_Start # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; ID_Start # Lo LAO LETTER KHO TAM +0E86..0E8A ; ID_Start # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; ID_Start # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; ID_Start # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; ID_Start # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; ID_Start # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; ID_Start # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; ID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; ID_Start # Lm LAO KO LA +0EDC..0EDF ; ID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; ID_Start # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; ID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; ID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F88..0F8C ; ID_Start # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +1000..102A ; ID_Start # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +103F ; ID_Start # Lo MYANMAR LETTER GREAT SA +1050..1055 ; ID_Start # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +105A..105D ; ID_Start # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; ID_Start # Lo MYANMAR LETTER SGAW KAREN SHA +1065..1066 ; ID_Start # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +106E..1070 ; ID_Start # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; ID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +108E ; ID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA +10A0..10C5 ; ID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Start # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; ID_Start # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; ID_Start # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; ID_Start # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; ID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; ID_Start # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; ID_Start # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; ID_Start # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; ID_Start # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; ID_Start # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; ID_Start # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; ID_Start # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; ID_Start # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; ID_Start # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; ID_Start # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; ID_Start # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; ID_Start # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; ID_Start # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; ID_Start # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; ID_Start # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; ID_Start # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; ID_Start # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; ID_Start # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; ID_Start # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; ID_Start # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1740..1751 ; ID_Start # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; ID_Start # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; ID_Start # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; ID_Start # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17D7 ; ID_Start # Lm KHMER SIGN LEK TOO +17DC ; ID_Start # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; ID_Start # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; ID_Start # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; ID_Start # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; ID_Start # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; ID_Start # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; ID_Start # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; ID_Start # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; ID_Start # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1950..196D ; ID_Start # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; ID_Start # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; ID_Start # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; ID_Start # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; ID_Start # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A20..1A54 ; ID_Start # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1AA7 ; ID_Start # Lm TAI THAM SIGN MAI YAMOK +1B05..1B33 ; ID_Start # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B45..1B4C ; ID_Start # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B83..1BA0 ; ID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BAE..1BAF ; ID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; ID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1C00..1C23 ; ID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C8A ; ID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; ID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; ID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; ID_Start # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; ID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; ID_Start # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; ID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; ID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; ID_Start # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; ID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; ID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; ID_Start # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; ID_Start # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; ID_Start # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; ID_Start # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; ID_Start # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; ID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; ID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; ID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; ID_Start # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; ID_Start # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; ID_Start # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; ID_Start # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; ID_Start # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; ID_Start # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; ID_Start # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; ID_Start # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; ID_Start # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; ID_Start # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; ID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; ID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; ID_Start # L& DOUBLE-STRUCK CAPITAL C +2107 ; ID_Start # L& EULER CONSTANT +210A..2113 ; ID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; ID_Start # L& DOUBLE-STRUCK CAPITAL N +2118 ; ID_Start # Sm SCRIPT CAPITAL P +2119..211D ; ID_Start # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; ID_Start # L& DOUBLE-STRUCK CAPITAL Z +2126 ; ID_Start # L& OHM SIGN +2128 ; ID_Start # L& BLACK-LETTER CAPITAL Z +212A..212D ; ID_Start # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; ID_Start # So ESTIMATED SYMBOL +212F..2134 ; ID_Start # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; ID_Start # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; ID_Start # L& INFORMATION SOURCE +213C..213F ; ID_Start # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; ID_Start # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; ID_Start # L& TURNED SMALL F +2160..2182 ; ID_Start # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; ID_Start # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; ID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; ID_Start # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; ID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; ID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; ID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; ID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; ID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; ID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; ID_Start # Lm IDEOGRAPHIC ITERATION MARK +3006 ; ID_Start # Lo IDEOGRAPHIC CLOSING MARK +3007 ; ID_Start # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; ID_Start # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3031..3035 ; ID_Start # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; ID_Start # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; ID_Start # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; ID_Start # Lo MASU MARK +3041..3096 ; ID_Start # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309B..309C ; ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; ID_Start # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; ID_Start # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; ID_Start # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; ID_Start # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; ID_Start # Lo KATAKANA DIGRAPH KOTO +3105..312F ; ID_Start # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; ID_Start # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; ID_Start # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; ID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; ID_Start # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; ID_Start # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; ID_Start # Lm YI SYLLABLE WU +A016..A48C ; ID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; ID_Start # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; ID_Start # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; ID_Start # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; ID_Start # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; ID_Start # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; ID_Start # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; ID_Start # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; ID_Start # Lo CYRILLIC LETTER MULTIOCULAR O +A67F ; ID_Start # Lm CYRILLIC PAYEROK +A680..A69B ; ID_Start # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; ID_Start # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; ID_Start # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; ID_Start # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A717..A71F ; ID_Start # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; ID_Start # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; ID_Start # Lm MODIFIER LETTER US +A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; ID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; ID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; ID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; ID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; ID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; ID_Start # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; ID_Start # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A840..A873 ; ID_Start # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A882..A8B3 ; ID_Start # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8F2..A8F7 ; ID_Start # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; ID_Start # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; ID_Start # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A90A..A925 ; ID_Start # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A930..A946 ; ID_Start # Lo [23] REJANG LETTER KA..REJANG LETTER A +A960..A97C ; ID_Start # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A984..A9B2 ; ID_Start # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9CF ; ID_Start # Lm JAVANESE PANGRANGKEP +A9E0..A9E4 ; ID_Start # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; ID_Start # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; ID_Start # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; ID_Start # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; ID_Start # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA40..AA42 ; ID_Start # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; ID_Start # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA60..AA6F ; ID_Start # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; ID_Start # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; ID_Start # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; ID_Start # Lo MYANMAR LETTER AITON RA +AA7E..AAAF ; ID_Start # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; ID_Start # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; ID_Start # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; ID_Start # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; ID_Start # Lo TAI VIET TONE MAI NUENG +AAC2 ; ID_Start # Lo TAI VIET TONE MAI SONG +AADB..AADC ; ID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; ID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AB01..AB06 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; ID_Start # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; ID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; ID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; ID_Start # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; ID_Start # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; ID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; ID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +AC00..D7A3 ; ID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; ID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; ID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; ID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; ID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; ID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; ID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; ID_Start # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; ID_Start # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; ID_Start # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; ID_Start # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; ID_Start # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; ID_Start # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; ID_Start # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; ID_Start # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; ID_Start # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; ID_Start # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; ID_Start # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; ID_Start # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; ID_Start # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; ID_Start # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; ID_Start # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; ID_Start # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; ID_Start # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; ID_Start # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; ID_Start # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; ID_Start # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; ID_Start # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; ID_Start # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; ID_Start # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; ID_Start # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; ID_Start # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; ID_Start # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; ID_Start # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; ID_Start # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; ID_Start # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; ID_Start # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; ID_Start # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; ID_Start # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; ID_Start # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; ID_Start # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; ID_Start # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; ID_Start # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; ID_Start # Nl GOTHIC LETTER NINETY +10342..10349 ; ID_Start # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; ID_Start # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; ID_Start # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; ID_Start # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; ID_Start # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; ID_Start # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; ID_Start # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; ID_Start # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; ID_Start # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; ID_Start # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; ID_Start # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; ID_Start # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; ID_Start # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; ID_Start # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; ID_Start # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; ID_Start # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; ID_Start # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; ID_Start # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; ID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; ID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; ID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; ID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; ID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; ID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; ID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; ID_Start # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; ID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; ID_Start # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; ID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; ID_Start # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; ID_Start # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; ID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; ID_Start # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; ID_Start # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; ID_Start # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; ID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; ID_Start # Lo KHAROSHTHI LETTER A +10A10..10A13 ; ID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; ID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; ID_Start # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; ID_Start # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; ID_Start # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; ID_Start # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; ID_Start # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; ID_Start # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; ID_Start # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; ID_Start # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; ID_Start # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; ID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; ID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; ID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; ID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; ID_Start # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ID_Start # Lm GARAY VOWEL LENGTH MARK +10D4F ; ID_Start # Lo GARAY SUKUN +10D50..10D65 ; ID_Start # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; ID_Start # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ID_Start # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; ID_Start # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; ID_Start # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; ID_Start # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11003..11037 ; ID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; ID_Start # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; ID_Start # Lo BRAHMI LETTER OLD TAMIL LLA +11083..110AF ; ID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11144 ; ID_Start # Lo CHAKMA LETTER LHAA +11147 ; ID_Start # Lo CHAKMA LETTER VAA +11150..11172 ; ID_Start # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; ID_Start # Lo MAHAJANI LIGATURE SHRI +11183..111B2 ; ID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111DA ; ID_Start # Lo SHARADA EKAM +111DC ; ID_Start # Lo SHARADA HEADSTROKE +11200..11211 ; ID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; ID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; ID_Start # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11280..11286 ; ID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; ID_Start # Lo MULTANI LETTER GHA +1128A..1128D ; ID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; ID_Start # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; ID_Start # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; ID_Start # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +11305..1130C ; ID_Start # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; ID_Start # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; ID_Start # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; ID_Start # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; ID_Start # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; ID_Start # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; ID_Start # Lo GRANTHA SIGN AVAGRAHA +11350 ; ID_Start # Lo GRANTHA OM +1135D..11361 ; ID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; ID_Start # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ID_Start # Lo TULU-TIGALARI LETTER EE +1138E ; ID_Start # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ID_Start # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ID_Start # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; ID_Start # Lo TULU-TIGALARI REPHA +113D3 ; ID_Start # Lo TULU-TIGALARI SIGN PLUTA +11400..11434 ; ID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; ID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; ID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; ID_Start # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114C4..114C5 ; ID_Start # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; ID_Start # Lo TIRHUTA OM +11580..115AE ; ID_Start # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115D8..115DB ; ID_Start # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; ID_Start # Lo [48] MODI LETTER A..MODI LETTER LLA +11644 ; ID_Start # Lo MODI SIGN HUVA +11680..116AA ; ID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116B8 ; ID_Start # Lo TAKRI LETTER ARCHAIC KHA +11700..1171A ; ID_Start # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11740..11746 ; ID_Start # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; ID_Start # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +118A0..118DF ; ID_Start # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; ID_Start # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; ID_Start # Lo DIVES AKURU LETTER O +1190C..11913 ; ID_Start # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; ID_Start # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; ID_Start # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +1193F ; ID_Start # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; ID_Start # Lo DIVES AKURU INITIAL RA +119A0..119A7 ; ID_Start # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; ID_Start # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119E1 ; ID_Start # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; ID_Start # Lo NANDINAGARI HEADSTROKE +11A00 ; ID_Start # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; ID_Start # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; ID_Start # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; ID_Start # Lo SOYOMBO LETTER A +11A5C..11A89 ; ID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A9D ; ID_Start # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; ID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11C00..11C08 ; ID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; ID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; ID_Start # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; ID_Start # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; ID_Start # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; ID_Start # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; ID_Start # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; ID_Start # Lo MASARAM GONDI REPHA +11D60..11D65 ; ID_Start # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; ID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; ID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D98 ; ID_Start # Lo GUNJALA GONDI OM +11EE0..11EF2 ; ID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; ID_Start # Lo KAWI SIGN REPHA +11F04..11F10 ; ID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; ID_Start # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11FB0 ; ID_Start # Lo LISU LETTER YHA +12000..12399 ; ID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; ID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; ID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; ID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; ID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; ID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; ID_Start # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; ID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ID_Start # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; ID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; ID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; ID_Start # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; ID_Start # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; ID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; ID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; ID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; ID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; ID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; ID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16E40..16E7F ; ID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; ID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK +17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; ID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; ID_Start # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; ID_Start # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; ID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; ID_Start # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; ID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; ID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; ID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; ID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; ID_Start # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; ID_Start # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1D400..1D454 ; ID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; ID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; ID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; ID_Start # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; ID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; ID_Start # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; ID_Start # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; ID_Start # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; ID_Start # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; ID_Start # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; ID_Start # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; ID_Start # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; ID_Start # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; ID_Start # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; ID_Start # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; ID_Start # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; ID_Start # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; ID_Start # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; ID_Start # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; ID_Start # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; ID_Start # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; ID_Start # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; ID_Start # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; ID_Start # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; ID_Start # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; ID_Start # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; ID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; ID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; ID_Start # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; ID_Start # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; ID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; ID_Start # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; ID_Start # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; ID_Start # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; ID_Start # Lo OL ONAL SIGN HODDOND +1E7E0..1E7E6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; ID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; ID_Start # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; ID_Start # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; ID_Start # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; ID_Start # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; ID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +20000..2A6DF ; ID_Start # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; ID_Start # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 141269 + +# ================================================ + +# Derived Property: ID_Continue +# Characters that can continue an identifier. +# Generated from: +# ID_Start +# + Mn + Mc + Nd + Pc +# + Other_ID_Continue +# - Pattern_Syntax +# - Pattern_White_Space +# NOTE: See UAX #31 for more information + +0030..0039 ; ID_Continue # Nd [10] DIGIT ZERO..DIGIT NINE +0041..005A ; ID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005F ; ID_Continue # Pc LOW LINE +0061..007A ; ID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; ID_Continue # Lo FEMININE ORDINAL INDICATOR +00B5 ; ID_Continue # L& MICRO SIGN +00B7 ; ID_Continue # Po MIDDLE DOT +00BA ; ID_Continue # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; ID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; ID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; ID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; ID_Continue # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; ID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; ID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; ID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; ID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; ID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; ID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; ID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; ID_Continue # Lm MODIFIER LETTER VOICING +02EE ; ID_Continue # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0300..036F ; ID_Continue # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373 ; ID_Continue # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; ID_Continue # Lm GREEK NUMERAL SIGN +0376..0377 ; ID_Continue # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; ID_Continue # Lm GREEK YPOGEGRAMMENI +037B..037D ; ID_Continue # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; ID_Continue # L& GREEK CAPITAL LETTER YOT +0386 ; ID_Continue # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; ID_Continue # Po GREEK ANO TELEIA +0388..038A ; ID_Continue # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; ID_Continue # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; ID_Continue # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; ID_Continue # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; ID_Continue # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0483..0487 ; ID_Continue # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +048A..052F ; ID_Continue # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; ID_Continue # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; ID_Continue # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; ID_Continue # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0591..05BD ; ID_Continue # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; ID_Continue # Mn HEBREW POINT RAFE +05C1..05C2 ; ID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; ID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; ID_Continue # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; ID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; ID_Continue # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0610..061A ; ID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +0620..063F ; ID_Continue # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; ID_Continue # Lm ARABIC TATWEEL +0641..064A ; ID_Continue # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F ; ID_Continue # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669 ; ID_Continue # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066E..066F ; ID_Continue # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; ID_Continue # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; ID_Continue # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; ID_Continue # Lo ARABIC LETTER AE +06D6..06DC ; ID_Continue # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; ID_Continue # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; ID_Continue # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; ID_Continue # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; ID_Continue # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; ID_Continue # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; ID_Continue # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; ID_Continue # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; ID_Continue # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; ID_Continue # Lo SYRIAC LETTER ALAPH +0711 ; ID_Continue # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; ID_Continue # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; ID_Continue # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..07A5 ; ID_Continue # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07A6..07B0 ; ID_Continue # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; ID_Continue # Lo THAANA LETTER NAA +07C0..07C9 ; ID_Continue # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; ID_Continue # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; ID_Continue # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; ID_Continue # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; ID_Continue # Lm NKO LAJANYALAN +07FD ; ID_Continue # Mn NKO DANTAYALAN +0800..0815 ; ID_Continue # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; ID_Continue # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; ID_Continue # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; ID_Continue # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; ID_Continue # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; ID_Continue # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; ID_Continue # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; ID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0840..0858 ; ID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; ID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; ID_Continue # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +0903 ; ID_Continue # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; ID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; ID_Continue # Mn DEVANAGARI VOWEL SIGN OE +093B ; ID_Continue # Mc DEVANAGARI VOWEL SIGN OOE +093C ; ID_Continue # Mn DEVANAGARI SIGN NUKTA +093D ; ID_Continue # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; ID_Continue # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; ID_Continue # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; ID_Continue # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; ID_Continue # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; ID_Continue # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; ID_Continue # Lo DEVANAGARI OM +0951..0957 ; ID_Continue # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; ID_Continue # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; ID_Continue # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; ID_Continue # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0971 ; ID_Continue # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; ID_Continue # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0981 ; ID_Continue # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; ID_Continue # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; ID_Continue # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; ID_Continue # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; ID_Continue # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; ID_Continue # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; ID_Continue # Lo BENGALI LETTER LA +09B6..09B9 ; ID_Continue # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; ID_Continue # Mn BENGALI SIGN NUKTA +09BD ; ID_Continue # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; ID_Continue # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; ID_Continue # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; ID_Continue # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; ID_Continue # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; ID_Continue # Mn BENGALI SIGN VIRAMA +09CE ; ID_Continue # Lo BENGALI LETTER KHANDA TA +09D7 ; ID_Continue # Mc BENGALI AU LENGTH MARK +09DC..09DD ; ID_Continue # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; ID_Continue # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; ID_Continue # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; ID_Continue # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; ID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; ID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA +09FE ; ID_Continue # Mn BENGALI SANDHI MARK +0A01..0A02 ; ID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; ID_Continue # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; ID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; ID_Continue # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; ID_Continue # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; ID_Continue # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; ID_Continue # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; ID_Continue # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; ID_Continue # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; ID_Continue # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; ID_Continue # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; ID_Continue # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; ID_Continue # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; ID_Continue # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; ID_Continue # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; ID_Continue # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; ID_Continue # Lo GURMUKHI LETTER FA +0A66..0A6F ; ID_Continue # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; ID_Continue # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; ID_Continue # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; ID_Continue # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; ID_Continue # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; ID_Continue # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; ID_Continue # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; ID_Continue # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; ID_Continue # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; ID_Continue # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; ID_Continue # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; ID_Continue # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; ID_Continue # Mn GUJARATI SIGN NUKTA +0ABD ; ID_Continue # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; ID_Continue # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; ID_Continue # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; ID_Continue # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; ID_Continue # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; ID_Continue # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; ID_Continue # Mn GUJARATI SIGN VIRAMA +0AD0 ; ID_Continue # Lo GUJARATI OM +0AE0..0AE1 ; ID_Continue # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; ID_Continue # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; ID_Continue # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF9 ; ID_Continue # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; ID_Continue # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; ID_Continue # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; ID_Continue # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; ID_Continue # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; ID_Continue # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; ID_Continue # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; ID_Continue # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; ID_Continue # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; ID_Continue # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; ID_Continue # Mn ORIYA SIGN NUKTA +0B3D ; ID_Continue # Lo ORIYA SIGN AVAGRAHA +0B3E ; ID_Continue # Mc ORIYA VOWEL SIGN AA +0B3F ; ID_Continue # Mn ORIYA VOWEL SIGN I +0B40 ; ID_Continue # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; ID_Continue # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; ID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; ID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; ID_Continue # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; ID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; ID_Continue # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; ID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; ID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; ID_Continue # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; ID_Continue # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B71 ; ID_Continue # Lo ORIYA LETTER WA +0B82 ; ID_Continue # Mn TAMIL SIGN ANUSVARA +0B83 ; ID_Continue # Lo TAMIL SIGN VISARGA +0B85..0B8A ; ID_Continue # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; ID_Continue # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; ID_Continue # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; ID_Continue # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; ID_Continue # Lo TAMIL LETTER JA +0B9E..0B9F ; ID_Continue # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; ID_Continue # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; ID_Continue # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; ID_Continue # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; ID_Continue # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; ID_Continue # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; ID_Continue # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; ID_Continue # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; ID_Continue # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; ID_Continue # Mn TAMIL SIGN VIRAMA +0BD0 ; ID_Continue # Lo TAMIL OM +0BD7 ; ID_Continue # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; ID_Continue # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C00 ; ID_Continue # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; ID_Continue # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; ID_Continue # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; ID_Continue # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; ID_Continue # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; ID_Continue # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; ID_Continue # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; ID_Continue # Mn TELUGU SIGN NUKTA +0C3D ; ID_Continue # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; ID_Continue # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; ID_Continue # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; ID_Continue # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C80 ; ID_Continue # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; ID_Continue # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; ID_Continue # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; ID_Continue # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; ID_Continue # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; ID_Continue # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; ID_Continue # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; ID_Continue # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; ID_Continue # Mn KANNADA SIGN NUKTA +0CBD ; ID_Continue # Lo KANNADA SIGN AVAGRAHA +0CBE ; ID_Continue # Mc KANNADA VOWEL SIGN AA +0CBF ; ID_Continue # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; ID_Continue # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; ID_Continue # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; ID_Continue # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; ID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; ID_Continue # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; ID_Continue # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; ID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; ID_Continue # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; ID_Continue # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; ID_Continue # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; ID_Continue # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; ID_Continue # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; ID_Continue # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; ID_Continue # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; ID_Continue # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; ID_Continue # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; ID_Continue # Mn MALAYALAM SIGN VIRAMA +0D4E ; ID_Continue # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; ID_Continue # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; ID_Continue # Mc MALAYALAM AU LENGTH MARK +0D5F..0D61 ; ID_Continue # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; ID_Continue # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; ID_Continue # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D7A..0D7F ; ID_Continue # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; ID_Continue # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; ID_Continue # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; ID_Continue # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; ID_Continue # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; ID_Continue # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; ID_Continue # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; ID_Continue # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; ID_Continue # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; ID_Continue # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; ID_Continue # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; ID_Continue # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; ID_Continue # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; ID_Continue # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; ID_Continue # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E01..0E30 ; ID_Continue # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; ID_Continue # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; ID_Continue # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; ID_Continue # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; ID_Continue # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; ID_Continue # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; ID_Continue # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E50..0E59 ; ID_Continue # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E81..0E82 ; ID_Continue # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; ID_Continue # Lo LAO LETTER KHO TAM +0E86..0E8A ; ID_Continue # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; ID_Continue # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; ID_Continue # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; ID_Continue # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; ID_Continue # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; ID_Continue # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; ID_Continue # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; ID_Continue # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; ID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; ID_Continue # Lm LAO KO LA +0EC8..0ECE ; ID_Continue # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0ED0..0ED9 ; ID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; ID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; ID_Continue # Lo TIBETAN SYLLABLE OM +0F18..0F19 ; ID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F20..0F29 ; ID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F35 ; ID_Continue # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; ID_Continue # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; ID_Continue # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; ID_Continue # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; ID_Continue # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; ID_Continue # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; ID_Continue # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; ID_Continue # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; ID_Continue # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; ID_Continue # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; ID_Continue # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; ID_Continue # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; ID_Continue # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; ID_Continue # Mn TIBETAN SYMBOL PADMA GDAN +1000..102A ; ID_Continue # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; ID_Continue # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; ID_Continue # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; ID_Continue # Mc MYANMAR VOWEL SIGN E +1032..1037 ; ID_Continue # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; ID_Continue # Mc MYANMAR SIGN VISARGA +1039..103A ; ID_Continue # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; ID_Continue # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; ID_Continue # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; ID_Continue # Lo MYANMAR LETTER GREAT SA +1040..1049 ; ID_Continue # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +1050..1055 ; ID_Continue # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; ID_Continue # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; ID_Continue # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; ID_Continue # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; ID_Continue # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; ID_Continue # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; ID_Continue # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; ID_Continue # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; ID_Continue # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; ID_Continue # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; ID_Continue # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; ID_Continue # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; ID_Continue # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; ID_Continue # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; ID_Continue # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; ID_Continue # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; ID_Continue # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; ID_Continue # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; ID_Continue # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; ID_Continue # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; ID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; ID_Continue # Mn MYANMAR VOWEL SIGN AITON AI +10A0..10C5 ; ID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Continue # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; ID_Continue # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; ID_Continue # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; ID_Continue # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; ID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; ID_Continue # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; ID_Continue # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; ID_Continue # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; ID_Continue # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; ID_Continue # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; ID_Continue # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; ID_Continue # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; ID_Continue # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1369..1371 ; ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +1380..138F ; ID_Continue # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; ID_Continue # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; ID_Continue # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; ID_Continue # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; ID_Continue # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; ID_Continue # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; ID_Continue # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; ID_Continue # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; ID_Continue # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; ID_Continue # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; ID_Continue # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; ID_Continue # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; ID_Continue # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1732..1733 ; ID_Continue # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; ID_Continue # Mc HANUNOO SIGN PAMUDPOD +1740..1751 ; ID_Continue # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; ID_Continue # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; ID_Continue # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; ID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; ID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; ID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; ID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; ID_Continue # Mc KHMER VOWEL SIGN AA +17B7..17BD ; ID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; ID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; ID_Continue # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; ID_Continue # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; ID_Continue # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D7 ; ID_Continue # Lm KHMER SIGN LEK TOO +17DC ; ID_Continue # Lo KHMER SIGN AVAKRAHASANYA +17DD ; ID_Continue # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; ID_Continue # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +180B..180D ; ID_Continue # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; ID_Continue # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; ID_Continue # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; ID_Continue # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; ID_Continue # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; ID_Continue # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; ID_Continue # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; ID_Continue # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; ID_Continue # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; ID_Continue # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; ID_Continue # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; ID_Continue # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; ID_Continue # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; ID_Continue # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; ID_Continue # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; ID_Continue # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; ID_Continue # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; ID_Continue # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; ID_Continue # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; ID_Continue # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; ID_Continue # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1946..194F ; ID_Continue # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; ID_Continue # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; ID_Continue # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; ID_Continue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; ID_Continue # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; ID_Continue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; ID_Continue # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; ID_Continue # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; ID_Continue # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; ID_Continue # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; ID_Continue # Mn BUGINESE VOWEL SIGN AE +1A20..1A54 ; ID_Continue # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; ID_Continue # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; ID_Continue # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; ID_Continue # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; ID_Continue # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; ID_Continue # Mn TAI THAM SIGN SAKOT +1A61 ; ID_Continue # Mc TAI THAM VOWEL SIGN A +1A62 ; ID_Continue # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; ID_Continue # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; ID_Continue # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; ID_Continue # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; ID_Continue # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; ID_Continue # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; ID_Continue # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK +1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; ID_Continue # Mc BALINESE SIGN BISAH +1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; ID_Continue # Mn BALINESE SIGN REREKAN +1B35 ; ID_Continue # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; ID_Continue # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; ID_Continue # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; ID_Continue # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; ID_Continue # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; ID_Continue # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; ID_Continue # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; ID_Continue # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; ID_Continue # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B6B..1B73 ; ID_Continue # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; ID_Continue # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; ID_Continue # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; ID_Continue # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; ID_Continue # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; ID_Continue # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; ID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; ID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; ID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; ID_Continue # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; ID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; ID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; ID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE6 ; ID_Continue # Mn BATAK SIGN TOMPI +1BE7 ; ID_Continue # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; ID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; ID_Continue # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; ID_Continue # Mn BATAK VOWEL SIGN KARO O +1BEE ; ID_Continue # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; ID_Continue # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; ID_Continue # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C00..1C23 ; ID_Continue # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; ID_Continue # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; ID_Continue # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; ID_Continue # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; ID_Continue # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C40..1C49 ; ID_Continue # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; ID_Continue # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; ID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; ID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; ID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C8A ; ID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; ID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; ID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CD0..1CD2 ; ID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; ID_Continue # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; ID_Continue # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; ID_Continue # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC ; ID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED ; ID_Continue # Mn VEDIC SIGN TIRYAK +1CEE..1CF3 ; ID_Continue # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; ID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; ID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; ID_Continue # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; ID_Continue # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA ; ID_Continue # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; ID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; ID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; ID_Continue # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; ID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; ID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; ID_Continue # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1F15 ; ID_Continue # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; ID_Continue # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; ID_Continue # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; ID_Continue # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; ID_Continue # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; ID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; ID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; ID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; ID_Continue # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; ID_Continue # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; ID_Continue # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; ID_Continue # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; ID_Continue # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; ID_Continue # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; ID_Continue # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; ID_Continue # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; ID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; ID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; ID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200C..200D ; ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +203F..2040 ; ID_Continue # Pc [2] UNDERTIE..CHARACTER TIE +2054 ; ID_Continue # Pc INVERTED UNDERTIE +2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; ID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20D0..20DC ; ID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; ID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20F0 ; ID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2102 ; ID_Continue # L& DOUBLE-STRUCK CAPITAL C +2107 ; ID_Continue # L& EULER CONSTANT +210A..2113 ; ID_Continue # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; ID_Continue # L& DOUBLE-STRUCK CAPITAL N +2118 ; ID_Continue # Sm SCRIPT CAPITAL P +2119..211D ; ID_Continue # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; ID_Continue # L& DOUBLE-STRUCK CAPITAL Z +2126 ; ID_Continue # L& OHM SIGN +2128 ; ID_Continue # L& BLACK-LETTER CAPITAL Z +212A..212D ; ID_Continue # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; ID_Continue # So ESTIMATED SYMBOL +212F..2134 ; ID_Continue # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; ID_Continue # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; ID_Continue # L& INFORMATION SOURCE +213C..213F ; ID_Continue # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; ID_Continue # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; ID_Continue # L& TURNED SMALL F +2160..2182 ; ID_Continue # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; ID_Continue # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; ID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; ID_Continue # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; ID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; ID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; ID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; ID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; ID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; ID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; ID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D7F ; ID_Continue # Mn TIFINAGH CONSONANT JOINER +2D80..2D96 ; ID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; ID_Continue # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +3005 ; ID_Continue # Lm IDEOGRAPHIC ITERATION MARK +3006 ; ID_Continue # Lo IDEOGRAPHIC CLOSING MARK +3007 ; ID_Continue # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; ID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D ; ID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; ID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; ID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; ID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; ID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; ID_Continue # Lo MASU MARK +3041..3096 ; ID_Continue # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A ; ID_Continue # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; ID_Continue # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; ID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; ID_Continue # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; ID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; ID_Continue # Po KATAKANA MIDDLE DOT +30FC..30FE ; ID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; ID_Continue # Lo KATAKANA DIGRAPH KOTO +3105..312F ; ID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; ID_Continue # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; ID_Continue # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; ID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; ID_Continue # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; ID_Continue # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; ID_Continue # Lm YI SYLLABLE WU +A016..A48C ; ID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; ID_Continue # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; ID_Continue # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; ID_Continue # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; ID_Continue # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; ID_Continue # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; ID_Continue # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; ID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; ID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; ID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; ID_Continue # Mn COMBINING CYRILLIC VZMET +A674..A67D ; ID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67F ; ID_Continue # Lm CYRILLIC PAYEROK +A680..A69B ; ID_Continue # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; ID_Continue # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; ID_Continue # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; ID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; ID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; ID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; ID_Continue # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; ID_Continue # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; ID_Continue # Lm MODIFIER LETTER US +A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; ID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; ID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; ID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; ID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A802 ; ID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; ID_Continue # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; ID_Continue # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; ID_Continue # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; ID_Continue # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; ID_Continue # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; ID_Continue # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; ID_Continue # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; ID_Continue # Mc SYLOTI NAGRI VOWEL SIGN OO +A82C ; ID_Continue # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A840..A873 ; ID_Continue # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; ID_Continue # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; ID_Continue # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; ID_Continue # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; ID_Continue # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8D0..A8D9 ; ID_Continue # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1 ; ID_Continue # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; ID_Continue # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; ID_Continue # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; ID_Continue # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; ID_Continue # Mn DEVANAGARI VOWEL SIGN AY +A900..A909 ; ID_Continue # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; ID_Continue # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; ID_Continue # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A930..A946 ; ID_Continue # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; ID_Continue # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; ID_Continue # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A960..A97C ; ID_Continue # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; ID_Continue # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; ID_Continue # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; ID_Continue # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; ID_Continue # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; ID_Continue # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; ID_Continue # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; ID_Continue # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; ID_Continue # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; ID_Continue # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9CF ; ID_Continue # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; ID_Continue # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9E0..A9E4 ; ID_Continue # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; ID_Continue # Mn MYANMAR SIGN SHAN SAW +A9E6 ; ID_Continue # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; ID_Continue # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; ID_Continue # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; ID_Continue # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; ID_Continue # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; ID_Continue # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; ID_Continue # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; ID_Continue # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; ID_Continue # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; ID_Continue # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; ID_Continue # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; ID_Continue # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; ID_Continue # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; ID_Continue # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; ID_Continue # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; ID_Continue # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA60..AA6F ; ID_Continue # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; ID_Continue # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; ID_Continue # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; ID_Continue # Lo MYANMAR LETTER AITON RA +AA7B ; ID_Continue # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; ID_Continue # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; ID_Continue # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; ID_Continue # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB0 ; ID_Continue # Mn TAI VIET MAI KANG +AAB1 ; ID_Continue # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; ID_Continue # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; ID_Continue # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; ID_Continue # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; ID_Continue # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; ID_Continue # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; ID_Continue # Lo TAI VIET TONE MAI NUENG +AAC1 ; ID_Continue # Mn TAI VIET TONE MAI THO +AAC2 ; ID_Continue # Lo TAI VIET TONE MAI SONG +AADB..AADC ; ID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; ID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; ID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; ID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; ID_Continue # Mn MEETEI MAYEK VIRAMA +AB01..AB06 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; ID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; ID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; ID_Continue # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; ID_Continue # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; ID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; ID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; ID_Continue # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; ID_Continue # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; ID_Continue # Mc MEETEI MAYEK LUM IYEK +ABED ; ID_Continue # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; ID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; ID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; ID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; ID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; ID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; ID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; ID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; ID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; ID_Continue # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; ID_Continue # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; ID_Continue # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; ID_Continue # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; ID_Continue # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; ID_Continue # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; ID_Continue # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; ID_Continue # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; ID_Continue # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; ID_Continue # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; ID_Continue # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; ID_Continue # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; ID_Continue # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE00..FE0F ; ID_Continue # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; ID_Continue # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE33..FE34 ; ID_Continue # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; ID_Continue # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE70..FE74 ; ID_Continue # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; ID_Continue # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF10..FF19 ; ID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF3A ; ID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3F ; ID_Continue # Pc FULLWIDTH LOW LINE +FF41..FF5A ; ID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF65 ; ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F ; ID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; ID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; ID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; ID_Continue # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; ID_Continue # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; ID_Continue # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; ID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; ID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; ID_Continue # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; ID_Continue # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; ID_Continue # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; ID_Continue # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; ID_Continue # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; ID_Continue # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; ID_Continue # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; ID_Continue # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +101FD ; ID_Continue # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C ; ID_Continue # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; ID_Continue # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0 ; ID_Continue # Mn COPTIC EPACT THOUSANDS MARK +10300..1031F ; ID_Continue # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; ID_Continue # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; ID_Continue # Nl GOTHIC LETTER NINETY +10342..10349 ; ID_Continue # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; ID_Continue # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; ID_Continue # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; ID_Continue # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; ID_Continue # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; ID_Continue # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; ID_Continue # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; ID_Continue # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; ID_Continue # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; ID_Continue # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; ID_Continue # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; ID_Continue # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; ID_Continue # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; ID_Continue # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; ID_Continue # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; ID_Continue # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; ID_Continue # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; ID_Continue # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; ID_Continue # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; ID_Continue # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; ID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; ID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; ID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; ID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; ID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; ID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; ID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; ID_Continue # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; ID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; ID_Continue # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; ID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; ID_Continue # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; ID_Continue # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; ID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; ID_Continue # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; ID_Continue # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; ID_Continue # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; ID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A +10A01..10A03 ; ID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; ID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; ID_Continue # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; ID_Continue # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; ID_Continue # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; ID_Continue # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; ID_Continue # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; ID_Continue # Mn KHAROSHTHI VIRAMA +10A60..10A7C ; ID_Continue # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; ID_Continue # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; ID_Continue # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; ID_Continue # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; ID_Continue # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10B00..10B35 ; ID_Continue # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; ID_Continue # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; ID_Continue # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; ID_Continue # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; ID_Continue # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; ID_Continue # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; ID_Continue # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; ID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; ID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; ID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; ID_Continue # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; ID_Continue # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ID_Continue # Lm GARAY VOWEL LENGTH MARK +10D4F ; ID_Continue # Lo GARAY SUKUN +10D50..10D65 ; ID_Continue # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; ID_Continue # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; ID_Continue # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ID_Continue # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10E80..10EA9 ; ID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; ID_Continue # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F70..10F81 ; ID_Continue # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; ID_Continue # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10FB0..10FC4 ; ID_Continue # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; ID_Continue # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; ID_Continue # Mc BRAHMI SIGN CANDRABINDU +11001 ; ID_Continue # Mn BRAHMI SIGN ANUSVARA +11002 ; ID_Continue # Mc BRAHMI SIGN VISARGA +11003..11037 ; ID_Continue # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; ID_Continue # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11066..1106F ; ID_Continue # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; ID_Continue # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; ID_Continue # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; ID_Continue # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; ID_Continue # Lo BRAHMI LETTER OLD TAMIL LLA +1107F..11081 ; ID_Continue # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +11082 ; ID_Continue # Mc KAITHI SIGN VISARGA +11083..110AF ; ID_Continue # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; ID_Continue # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; ID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; ID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; ID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; ID_Continue # Mn KAITHI VOWEL SIGN VOCALIC R +110D0..110E8 ; ID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; ID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; ID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; ID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; ID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; ID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; ID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; ID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11144 ; ID_Continue # Lo CHAKMA LETTER LHAA +11145..11146 ; ID_Continue # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; ID_Continue # Lo CHAKMA LETTER VAA +11150..11172 ; ID_Continue # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; ID_Continue # Mn MAHAJANI SIGN NUKTA +11176 ; ID_Continue # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; ID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; ID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; ID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; ID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; ID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; ID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; ID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C9..111CC ; ID_Continue # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CE ; ID_Continue # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; ID_Continue # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; ID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; ID_Continue # Lo SHARADA EKAM +111DC ; ID_Continue # Lo SHARADA HEADSTROKE +11200..11211 ; ID_Continue # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; ID_Continue # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; ID_Continue # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; ID_Continue # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; ID_Continue # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; ID_Continue # Mn KHOJKI SIGN ANUSVARA +11235 ; ID_Continue # Mc KHOJKI SIGN VIRAMA +11236..11237 ; ID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; ID_Continue # Mn KHOJKI SIGN SUKUN +1123F..11240 ; ID_Continue # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; ID_Continue # Mn KHOJKI VOWEL SIGN VOCALIC R +11280..11286 ; ID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; ID_Continue # Lo MULTANI LETTER GHA +1128A..1128D ; ID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; ID_Continue # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; ID_Continue # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; ID_Continue # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; ID_Continue # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; ID_Continue # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; ID_Continue # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; ID_Continue # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301 ; ID_Continue # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; ID_Continue # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; ID_Continue # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; ID_Continue # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; ID_Continue # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; ID_Continue # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; ID_Continue # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; ID_Continue # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C ; ID_Continue # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D ; ID_Continue # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; ID_Continue # Mn GRANTHA VOWEL SIGN II +11341..11344 ; ID_Continue # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; ID_Continue # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; ID_Continue # Lo GRANTHA OM +11357 ; ID_Continue # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; ID_Continue # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; ID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; ID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; ID_Continue # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; ID_Continue # Lo TULU-TIGALARI LETTER EE +1138E ; ID_Continue # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; ID_Continue # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; ID_Continue # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; ID_Continue # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; ID_Continue # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; ID_Continue # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; ID_Continue # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; ID_Continue # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; ID_Continue # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; ID_Continue # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; ID_Continue # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; ID_Continue # Mn TULU-TIGALARI CONJOINER +113D1 ; ID_Continue # Lo TULU-TIGALARI REPHA +113D2 ; ID_Continue # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; ID_Continue # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; ID_Continue # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11400..11434 ; ID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; ID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; ID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; ID_Continue # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; ID_Continue # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; ID_Continue # Mc NEWA SIGN VISARGA +11446 ; ID_Continue # Mn NEWA SIGN NUKTA +11447..1144A ; ID_Continue # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +11450..11459 ; ID_Continue # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145E ; ID_Continue # Mn NEWA SANDHI MARK +1145F..11461 ; ID_Continue # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; ID_Continue # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; ID_Continue # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; ID_Continue # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; ID_Continue # Mc TIRHUTA VOWEL SIGN E +114BA ; ID_Continue # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; ID_Continue # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; ID_Continue # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; ID_Continue # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; ID_Continue # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; ID_Continue # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; ID_Continue # Lo TIRHUTA OM +114D0..114D9 ; ID_Continue # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; ID_Continue # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; ID_Continue # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; ID_Continue # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; ID_Continue # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; ID_Continue # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; ID_Continue # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; ID_Continue # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115D8..115DB ; ID_Continue # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; ID_Continue # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; ID_Continue # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; ID_Continue # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; ID_Continue # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; ID_Continue # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; ID_Continue # Mn MODI SIGN ANUSVARA +1163E ; ID_Continue # Mc MODI SIGN VISARGA +1163F..11640 ; ID_Continue # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11644 ; ID_Continue # Lo MODI SIGN HUVA +11650..11659 ; ID_Continue # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; ID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; ID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; ID_Continue # Mc TAKRI SIGN VISARGA +116AD ; ID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; ID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; ID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; ID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA +116B8 ; ID_Continue # Lo TAKRI LETTER ARCHAIC KHA +116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; ID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11700..1171A ; ID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D ; ID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; ID_Continue # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; ID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; ID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; ID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; ID_Continue # Mc AHOM VOWEL SIGN E +11727..1172B ; ID_Continue # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; ID_Continue # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +11740..11746 ; ID_Continue # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; ID_Continue # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; ID_Continue # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; ID_Continue # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; ID_Continue # Mc DOGRA SIGN VISARGA +11839..1183A ; ID_Continue # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +118A0..118DF ; ID_Continue # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; ID_Continue # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118FF..11906 ; ID_Continue # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; ID_Continue # Lo DIVES AKURU LETTER O +1190C..11913 ; ID_Continue # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; ID_Continue # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; ID_Continue # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; ID_Continue # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; ID_Continue # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; ID_Continue # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; ID_Continue # Mc DIVES AKURU SIGN HALANTA +1193E ; ID_Continue # Mn DIVES AKURU VIRAMA +1193F ; ID_Continue # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; ID_Continue # Mc DIVES AKURU MEDIAL YA +11941 ; ID_Continue # Lo DIVES AKURU INITIAL RA +11942 ; ID_Continue # Mc DIVES AKURU MEDIAL RA +11943 ; ID_Continue # Mn DIVES AKURU SIGN NUKTA +11950..11959 ; ID_Continue # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; ID_Continue # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; ID_Continue # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; ID_Continue # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; ID_Continue # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; ID_Continue # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; ID_Continue # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; ID_Continue # Mn NANDINAGARI SIGN VIRAMA +119E1 ; ID_Continue # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; ID_Continue # Lo NANDINAGARI HEADSTROKE +119E4 ; ID_Continue # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; ID_Continue # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; ID_Continue # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; ID_Continue # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; ID_Continue # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; ID_Continue # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; ID_Continue # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; ID_Continue # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; ID_Continue # Mn ZANABAZAR SQUARE SUBJOINER +11A50 ; ID_Continue # Lo SOYOMBO LETTER A +11A51..11A56 ; ID_Continue # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; ID_Continue # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; ID_Continue # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; ID_Continue # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; ID_Continue # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; ID_Continue # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; ID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; ID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; ID_Continue # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; ID_Continue # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; ID_Continue # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; ID_Continue # Mc BHAIKSUKI SIGN VISARGA +11C3F ; ID_Continue # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; ID_Continue # Lo BHAIKSUKI SIGN AVAGRAHA +11C50..11C59 ; ID_Continue # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C72..11C8F ; ID_Continue # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; ID_Continue # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; ID_Continue # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; ID_Continue # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; ID_Continue # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; ID_Continue # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; ID_Continue # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; ID_Continue # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; ID_Continue # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; ID_Continue # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; ID_Continue # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; ID_Continue # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; ID_Continue # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; ID_Continue # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; ID_Continue # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; ID_Continue # Lo MASARAM GONDI REPHA +11D47 ; ID_Continue # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; ID_Continue # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; ID_Continue # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; ID_Continue # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; ID_Continue # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; ID_Continue # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; ID_Continue # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; ID_Continue # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; ID_Continue # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; ID_Continue # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; ID_Continue # Mn GUNJALA GONDI VIRAMA +11D98 ; ID_Continue # Lo GUNJALA GONDI OM +11DA0..11DA9 ; ID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; ID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; ID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; ID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; ID_Continue # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; ID_Continue # Lo KAWI SIGN REPHA +11F03 ; ID_Continue # Mc KAWI SIGN VISARGA +11F04..11F10 ; ID_Continue # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; ID_Continue # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; ID_Continue # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; ID_Continue # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; ID_Continue # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; ID_Continue # Mn KAWI VOWEL SIGN EU +11F41 ; ID_Continue # Mc KAWI SIGN KILLER +11F42 ; ID_Continue # Mn KAWI CONJOINER +11F50..11F59 ; ID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; ID_Continue # Mn KAWI SIGN NUKTA +11FB0 ; ID_Continue # Lo LISU LETTER YHA +12000..12399 ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; ID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; ID_Continue # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; ID_Continue # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13440 ; ID_Continue # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446 ; ID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455 ; ID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +13460..143FA ; ID_Continue # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; ID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; ID_Continue # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; ID_Continue # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; ID_Continue # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; ID_Continue # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; ID_Continue # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE +16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; ID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; ID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A70..16ABE ; ID_Continue # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; ID_Continue # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; ID_Continue # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; ID_Continue # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B00..16B2F ; ID_Continue # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; ID_Continue # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B40..16B43 ; ID_Continue # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B50..16B59 ; ID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B63..16B77 ; ID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; ID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; ID_Continue # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; ID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; ID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D70..16D79 ; ID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16E40..16E7F ; ID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; ID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; ID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; ID_Continue # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; ID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; ID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; ID_Continue # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK +16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; ID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; ID_Continue # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; ID_Continue # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; ID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; ID_Continue # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; ID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; ID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; ID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; ID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; ID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; ID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9D..1BC9E ; ID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CCF0..1CCF9 ; ID_Continue # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CF00..1CF2D ; ID_Continue # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; ID_Continue # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; ID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; ID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; ID_Continue # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; ID_Continue # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; ID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; ID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; ID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D400..1D454 ; ID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; ID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; ID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; ID_Continue # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; ID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; ID_Continue # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; ID_Continue # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; ID_Continue # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; ID_Continue # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; ID_Continue # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; ID_Continue # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; ID_Continue # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; ID_Continue # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; ID_Continue # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; ID_Continue # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; ID_Continue # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; ID_Continue # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; ID_Continue # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; ID_Continue # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; ID_Continue # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; ID_Continue # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; ID_Continue # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; ID_Continue # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; ID_Continue # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; ID_Continue # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; ID_Continue # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; ID_Continue # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; ID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; ID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DA00..1DA36 ; ID_Continue # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; ID_Continue # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; ID_Continue # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; ID_Continue # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; ID_Continue # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; ID_Continue # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; ID_Continue # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; ID_Continue # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; ID_Continue # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; ID_Continue # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E100..1E12C ; ID_Continue # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; ID_Continue # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; ID_Continue # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; ID_Continue # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; ID_Continue # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; ID_Continue # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; ID_Continue # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB ; ID_Continue # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; ID_Continue # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; ID_Continue # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4D0..1E4EA ; ID_Continue # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; ID_Continue # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; ID_Continue # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9 ; ID_Continue # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; ID_Continue # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5EE..1E5EF ; ID_Continue # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E5F0 ; ID_Continue # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; ID_Continue # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E7E0..1E7E6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; ID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; ID_Continue # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; ID_Continue # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8D0..1E8D6 ; ID_Continue # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943 ; ID_Continue # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; ID_Continue # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; ID_Continue # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; ID_Continue # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1EE00..1EE03 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1FBF0..1FBF9 ; ID_Continue # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; ID_Continue # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; ID_Continue # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 144541 + +# ================================================ + +# Derived Property: XID_Start +# ID_Start modified for closure under NFKx +# Modified as described in UAX #15 +# NOTE: Does NOT remove the non-NFKx characters. +# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) +# NOTE: See UAX #31 for more information + +0041..005A ; XID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; XID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; XID_Start # Lo FEMININE ORDINAL INDICATOR +00B5 ; XID_Start # L& MICRO SIGN +00BA ; XID_Start # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; XID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; XID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; XID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; XID_Start # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; XID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; XID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; XID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; XID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; XID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; XID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; XID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; XID_Start # Lm MODIFIER LETTER VOICING +02EE ; XID_Start # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; XID_Start # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; XID_Start # Lm GREEK NUMERAL SIGN +0376..0377 ; XID_Start # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; XID_Start # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; XID_Start # L& GREEK CAPITAL LETTER YOT +0386 ; XID_Start # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; XID_Start # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; XID_Start # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; XID_Start # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; XID_Start # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; XID_Start # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; XID_Start # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; XID_Start # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; XID_Start # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; XID_Start # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +05D0..05EA ; XID_Start # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; XID_Start # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0620..063F ; XID_Start # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; XID_Start # Lm ARABIC TATWEEL +0641..064A ; XID_Start # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; XID_Start # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; XID_Start # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; XID_Start # Lo ARABIC LETTER AE +06E5..06E6 ; XID_Start # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; XID_Start # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; XID_Start # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; XID_Start # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; XID_Start # Lo SYRIAC LETTER ALAPH +0712..072F ; XID_Start # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; XID_Start # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; XID_Start # Lo THAANA LETTER NAA +07CA..07EA ; XID_Start # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; XID_Start # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; XID_Start # Lm NKO LAJANYALAN +0800..0815 ; XID_Start # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; XID_Start # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; XID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; XID_Start # Lm SAMARITAN MODIFIER LETTER I +0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; XID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; XID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; XID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; XID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; XID_Start # Lm ARABIC SMALL FARSI YEH +0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; XID_Start # Lo DEVANAGARI SIGN AVAGRAHA +0950 ; XID_Start # Lo DEVANAGARI OM +0958..0961 ; XID_Start # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0971 ; XID_Start # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; XID_Start # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0985..098C ; XID_Start # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; XID_Start # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; XID_Start # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; XID_Start # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; XID_Start # Lo BENGALI LETTER LA +09B6..09B9 ; XID_Start # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; XID_Start # Lo BENGALI SIGN AVAGRAHA +09CE ; XID_Start # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; XID_Start # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; XID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; XID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; XID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +0A05..0A0A ; XID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; XID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; XID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; XID_Start # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; XID_Start # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; XID_Start # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; XID_Start # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A59..0A5C ; XID_Start # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; XID_Start # Lo GURMUKHI LETTER FA +0A72..0A74 ; XID_Start # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A85..0A8D ; XID_Start # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; XID_Start # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; XID_Start # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; XID_Start # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; XID_Start # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; XID_Start # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; XID_Start # Lo GUJARATI SIGN AVAGRAHA +0AD0 ; XID_Start # Lo GUJARATI OM +0AE0..0AE1 ; XID_Start # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF9 ; XID_Start # Lo GUJARATI LETTER ZHA +0B05..0B0C ; XID_Start # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; XID_Start # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; XID_Start # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; XID_Start # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; XID_Start # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; XID_Start # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; XID_Start # Lo ORIYA SIGN AVAGRAHA +0B5C..0B5D ; XID_Start # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; XID_Start # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; XID_Start # Lo ORIYA LETTER WA +0B83 ; XID_Start # Lo TAMIL SIGN VISARGA +0B85..0B8A ; XID_Start # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; XID_Start # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; XID_Start # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; XID_Start # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; XID_Start # Lo TAMIL LETTER JA +0B9E..0B9F ; XID_Start # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; XID_Start # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; XID_Start # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; XID_Start # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BD0 ; XID_Start # Lo TAMIL OM +0C05..0C0C ; XID_Start # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; XID_Start # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; XID_Start # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA +0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU +0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; XID_Start # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; XID_Start # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA +0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; XID_Start # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; XID_Start # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; XID_Start # Lo MALAYALAM SIGN AVAGRAHA +0D4E ; XID_Start # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; XID_Start # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D5F..0D61 ; XID_Start # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D7A..0D7F ; XID_Start # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D85..0D96 ; XID_Start # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; XID_Start # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; XID_Start # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; XID_Start # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; XID_Start # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0E01..0E30 ; XID_Start # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32 ; XID_Start # Lo THAI CHARACTER SARA AA +0E40..0E45 ; XID_Start # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; XID_Start # Lm THAI CHARACTER MAIYAMOK +0E81..0E82 ; XID_Start # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; XID_Start # Lo LAO LETTER KHO TAM +0E86..0E8A ; XID_Start # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; XID_Start # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; XID_Start # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; XID_Start # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2 ; XID_Start # Lo LAO VOWEL SIGN AA +0EBD ; XID_Start # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; XID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; XID_Start # Lm LAO KO LA +0EDC..0EDF ; XID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; XID_Start # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; XID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; XID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F88..0F8C ; XID_Start # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +1000..102A ; XID_Start # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +103F ; XID_Start # Lo MYANMAR LETTER GREAT SA +1050..1055 ; XID_Start # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +105A..105D ; XID_Start # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; XID_Start # Lo MYANMAR LETTER SGAW KAREN SHA +1065..1066 ; XID_Start # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +106E..1070 ; XID_Start # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; XID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +108E ; XID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA +10A0..10C5 ; XID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Start # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; XID_Start # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; XID_Start # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; XID_Start # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; XID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; XID_Start # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; XID_Start # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; XID_Start # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; XID_Start # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; XID_Start # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; XID_Start # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; XID_Start # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; XID_Start # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; XID_Start # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; XID_Start # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; XID_Start # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; XID_Start # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; XID_Start # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; XID_Start # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; XID_Start # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; XID_Start # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; XID_Start # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; XID_Start # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1740..1751 ; XID_Start # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; XID_Start # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; XID_Start # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; XID_Start # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17D7 ; XID_Start # Lm KHMER SIGN LEK TOO +17DC ; XID_Start # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; XID_Start # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; XID_Start # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; XID_Start # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; XID_Start # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; XID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; XID_Start # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; XID_Start # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; XID_Start # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; XID_Start # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1950..196D ; XID_Start # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; XID_Start # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; XID_Start # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; XID_Start # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; XID_Start # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A20..1A54 ; XID_Start # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1AA7 ; XID_Start # Lm TAI THAM SIGN MAI YAMOK +1B05..1B33 ; XID_Start # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B45..1B4C ; XID_Start # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B83..1BA0 ; XID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BAE..1BAF ; XID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; XID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1C00..1C23 ; XID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C8A ; XID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; XID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; XID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; XID_Start # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; XID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; XID_Start # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; XID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; XID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; XID_Start # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; XID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; XID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; XID_Start # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; XID_Start # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; XID_Start # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; XID_Start # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; XID_Start # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; XID_Start # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; XID_Start # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; XID_Start # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; XID_Start # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; XID_Start # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; XID_Start # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; XID_Start # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; XID_Start # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; XID_Start # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; XID_Start # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; XID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; XID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; XID_Start # L& DOUBLE-STRUCK CAPITAL C +2107 ; XID_Start # L& EULER CONSTANT +210A..2113 ; XID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; XID_Start # L& DOUBLE-STRUCK CAPITAL N +2118 ; XID_Start # Sm SCRIPT CAPITAL P +2119..211D ; XID_Start # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; XID_Start # L& DOUBLE-STRUCK CAPITAL Z +2126 ; XID_Start # L& OHM SIGN +2128 ; XID_Start # L& BLACK-LETTER CAPITAL Z +212A..212D ; XID_Start # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; XID_Start # So ESTIMATED SYMBOL +212F..2134 ; XID_Start # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; XID_Start # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; XID_Start # L& INFORMATION SOURCE +213C..213F ; XID_Start # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; XID_Start # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; XID_Start # L& TURNED SMALL F +2160..2182 ; XID_Start # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; XID_Start # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; XID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; XID_Start # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; XID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; XID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; XID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; XID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; XID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; XID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; XID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; XID_Start # Lm IDEOGRAPHIC ITERATION MARK +3006 ; XID_Start # Lo IDEOGRAPHIC CLOSING MARK +3007 ; XID_Start # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; XID_Start # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3031..3035 ; XID_Start # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; XID_Start # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; XID_Start # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; XID_Start # Lo MASU MARK +3041..3096 ; XID_Start # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; XID_Start # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; XID_Start # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; XID_Start # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; XID_Start # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; XID_Start # Lo KATAKANA DIGRAPH KOTO +3105..312F ; XID_Start # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; XID_Start # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; XID_Start # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; XID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; XID_Start # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; XID_Start # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; XID_Start # Lm YI SYLLABLE WU +A016..A48C ; XID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; XID_Start # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; XID_Start # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; XID_Start # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; XID_Start # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; XID_Start # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; XID_Start # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; XID_Start # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; XID_Start # Lo CYRILLIC LETTER MULTIOCULAR O +A67F ; XID_Start # Lm CYRILLIC PAYEROK +A680..A69B ; XID_Start # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; XID_Start # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; XID_Start # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; XID_Start # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A717..A71F ; XID_Start # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; XID_Start # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; XID_Start # Lm MODIFIER LETTER US +A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; XID_Start # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; XID_Start # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; XID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; XID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; XID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; XID_Start # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; XID_Start # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A840..A873 ; XID_Start # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A882..A8B3 ; XID_Start # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8F2..A8F7 ; XID_Start # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; XID_Start # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; XID_Start # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A90A..A925 ; XID_Start # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A930..A946 ; XID_Start # Lo [23] REJANG LETTER KA..REJANG LETTER A +A960..A97C ; XID_Start # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A984..A9B2 ; XID_Start # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9CF ; XID_Start # Lm JAVANESE PANGRANGKEP +A9E0..A9E4 ; XID_Start # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; XID_Start # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; XID_Start # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; XID_Start # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; XID_Start # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA40..AA42 ; XID_Start # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; XID_Start # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA60..AA6F ; XID_Start # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; XID_Start # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; XID_Start # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; XID_Start # Lo MYANMAR LETTER AITON RA +AA7E..AAAF ; XID_Start # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; XID_Start # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; XID_Start # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; XID_Start # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; XID_Start # Lo TAI VIET TONE MAI NUENG +AAC2 ; XID_Start # Lo TAI VIET TONE MAI SONG +AADB..AADC ; XID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; XID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; XID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AB01..AB06 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; XID_Start # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; XID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; XID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; XID_Start # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; XID_Start # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; XID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; XID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; XID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; XID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; XID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; XID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; XID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; XID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; XID_Start # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; XID_Start # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; XID_Start # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; XID_Start # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; XID_Start # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; XID_Start # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; XID_Start # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; XID_Start # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FC5D ; XID_Start # Lo [139] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM +FC64..FD3D ; XID_Start # Lo [218] ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH REH FINAL FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; XID_Start # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; XID_Start # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDF9 ; XID_Start # Lo [10] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE SALLA ISOLATED FORM +FE71 ; XID_Start # Lo ARABIC TATWEEL WITH FATHATAN ABOVE +FE73 ; XID_Start # Lo ARABIC TAIL FRAGMENT +FE77 ; XID_Start # Lo ARABIC FATHA MEDIAL FORM +FE79 ; XID_Start # Lo ARABIC DAMMA MEDIAL FORM +FE7B ; XID_Start # Lo ARABIC KASRA MEDIAL FORM +FE7D ; XID_Start # Lo ARABIC SHADDA MEDIAL FORM +FE7F..FEFC ; XID_Start # Lo [126] ARABIC SUKUN MEDIAL FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; XID_Start # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; XID_Start # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; XID_Start # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; XID_Start # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; XID_Start # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FFA0..FFBE ; XID_Start # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; XID_Start # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; XID_Start # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; XID_Start # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; XID_Start # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; XID_Start # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; XID_Start # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; XID_Start # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; XID_Start # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; XID_Start # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; XID_Start # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; XID_Start # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; XID_Start # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; XID_Start # Nl GOTHIC LETTER NINETY +10342..10349 ; XID_Start # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; XID_Start # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; XID_Start # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; XID_Start # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; XID_Start # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; XID_Start # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; XID_Start # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; XID_Start # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; XID_Start # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; XID_Start # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; XID_Start # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; XID_Start # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; XID_Start # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; XID_Start # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; XID_Start # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; XID_Start # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; XID_Start # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; XID_Start # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; XID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; XID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; XID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; XID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; XID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; XID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; XID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; XID_Start # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; XID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; XID_Start # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; XID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; XID_Start # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; XID_Start # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; XID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; XID_Start # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; XID_Start # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; XID_Start # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; XID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; XID_Start # Lo KHAROSHTHI LETTER A +10A10..10A13 ; XID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; XID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; XID_Start # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; XID_Start # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; XID_Start # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; XID_Start # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; XID_Start # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; XID_Start # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; XID_Start # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; XID_Start # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; XID_Start # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; XID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; XID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; XID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; XID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; XID_Start # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; XID_Start # Lm GARAY VOWEL LENGTH MARK +10D4F ; XID_Start # Lo GARAY SUKUN +10D50..10D65 ; XID_Start # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; XID_Start # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; XID_Start # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; XID_Start # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; XID_Start # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; XID_Start # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11003..11037 ; XID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; XID_Start # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; XID_Start # Lo BRAHMI LETTER OLD TAMIL LLA +11083..110AF ; XID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; XID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; XID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11144 ; XID_Start # Lo CHAKMA LETTER LHAA +11147 ; XID_Start # Lo CHAKMA LETTER VAA +11150..11172 ; XID_Start # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; XID_Start # Lo MAHAJANI LIGATURE SHRI +11183..111B2 ; XID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; XID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111DA ; XID_Start # Lo SHARADA EKAM +111DC ; XID_Start # Lo SHARADA HEADSTROKE +11200..11211 ; XID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; XID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; XID_Start # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11280..11286 ; XID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; XID_Start # Lo MULTANI LETTER GHA +1128A..1128D ; XID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; XID_Start # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; XID_Start # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; XID_Start # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +11305..1130C ; XID_Start # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; XID_Start # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; XID_Start # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; XID_Start # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; XID_Start # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; XID_Start # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; XID_Start # Lo GRANTHA SIGN AVAGRAHA +11350 ; XID_Start # Lo GRANTHA OM +1135D..11361 ; XID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; XID_Start # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; XID_Start # Lo TULU-TIGALARI LETTER EE +1138E ; XID_Start # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; XID_Start # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; XID_Start # Lo TULU-TIGALARI SIGN AVAGRAHA +113D1 ; XID_Start # Lo TULU-TIGALARI REPHA +113D3 ; XID_Start # Lo TULU-TIGALARI SIGN PLUTA +11400..11434 ; XID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; XID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; XID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; XID_Start # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114C4..114C5 ; XID_Start # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; XID_Start # Lo TIRHUTA OM +11580..115AE ; XID_Start # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115D8..115DB ; XID_Start # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; XID_Start # Lo [48] MODI LETTER A..MODI LETTER LLA +11644 ; XID_Start # Lo MODI SIGN HUVA +11680..116AA ; XID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116B8 ; XID_Start # Lo TAKRI LETTER ARCHAIC KHA +11700..1171A ; XID_Start # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11740..11746 ; XID_Start # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; XID_Start # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +118A0..118DF ; XID_Start # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; XID_Start # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; XID_Start # Lo DIVES AKURU LETTER O +1190C..11913 ; XID_Start # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; XID_Start # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; XID_Start # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +1193F ; XID_Start # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; XID_Start # Lo DIVES AKURU INITIAL RA +119A0..119A7 ; XID_Start # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; XID_Start # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119E1 ; XID_Start # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; XID_Start # Lo NANDINAGARI HEADSTROKE +11A00 ; XID_Start # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; XID_Start # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; XID_Start # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; XID_Start # Lo SOYOMBO LETTER A +11A5C..11A89 ; XID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A9D ; XID_Start # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; XID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; XID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11C00..11C08 ; XID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; XID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; XID_Start # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; XID_Start # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; XID_Start # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; XID_Start # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; XID_Start # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; XID_Start # Lo MASARAM GONDI REPHA +11D60..11D65 ; XID_Start # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; XID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; XID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D98 ; XID_Start # Lo GUNJALA GONDI OM +11EE0..11EF2 ; XID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; XID_Start # Lo KAWI SIGN REPHA +11F04..11F10 ; XID_Start # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; XID_Start # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11FB0 ; XID_Start # Lo LISU LETTER YHA +12000..12399 ; XID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; XID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; XID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; XID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; XID_Start # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; XID_Start # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; XID_Start # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; XID_Start # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; XID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; XID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; XID_Start # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; XID_Start # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; XID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; XID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; XID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; XID_Start # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; XID_Start # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; XID_Start # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16E40..16E7F ; XID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; XID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK +17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; XID_Start # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; XID_Start # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; XID_Start # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; XID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; XID_Start # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; XID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; XID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; XID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; XID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; XID_Start # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; XID_Start # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1D400..1D454 ; XID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; XID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; XID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; XID_Start # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; XID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; XID_Start # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; XID_Start # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; XID_Start # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; XID_Start # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; XID_Start # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; XID_Start # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; XID_Start # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; XID_Start # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; XID_Start # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; XID_Start # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; XID_Start # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; XID_Start # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; XID_Start # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; XID_Start # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; XID_Start # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; XID_Start # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; XID_Start # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; XID_Start # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; XID_Start # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; XID_Start # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; XID_Start # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; XID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; XID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; XID_Start # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; XID_Start # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; XID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; XID_Start # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; XID_Start # Lm NAG MUNDARI SIGN OJOD +1E5D0..1E5ED ; XID_Start # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; XID_Start # Lo OL ONAL SIGN HODDOND +1E7E0..1E7E6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; XID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; XID_Start # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; XID_Start # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; XID_Start # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; XID_Start # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; XID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +20000..2A6DF ; XID_Start # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; XID_Start # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 141246 + +# ================================================ + +# Derived Property: XID_Continue +# Mod_ID_Continue modified for closure under NFKx +# Modified as described in UAX #15 +# NOTE: Does NOT remove the non-NFKx characters. +# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) +# NOTE: See UAX #31 for more information + +0030..0039 ; XID_Continue # Nd [10] DIGIT ZERO..DIGIT NINE +0041..005A ; XID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005F ; XID_Continue # Pc LOW LINE +0061..007A ; XID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; XID_Continue # Lo FEMININE ORDINAL INDICATOR +00B5 ; XID_Continue # L& MICRO SIGN +00B7 ; XID_Continue # Po MIDDLE DOT +00BA ; XID_Continue # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; XID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; XID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; XID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; XID_Continue # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; XID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; XID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; XID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; XID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; XID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; XID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; XID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; XID_Continue # Lm MODIFIER LETTER VOICING +02EE ; XID_Continue # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0300..036F ; XID_Continue # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373 ; XID_Continue # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; XID_Continue # Lm GREEK NUMERAL SIGN +0376..0377 ; XID_Continue # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; XID_Continue # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; XID_Continue # L& GREEK CAPITAL LETTER YOT +0386 ; XID_Continue # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; XID_Continue # Po GREEK ANO TELEIA +0388..038A ; XID_Continue # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; XID_Continue # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; XID_Continue # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; XID_Continue # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; XID_Continue # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0483..0487 ; XID_Continue # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +048A..052F ; XID_Continue # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; XID_Continue # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; XID_Continue # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; XID_Continue # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0591..05BD ; XID_Continue # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; XID_Continue # Mn HEBREW POINT RAFE +05C1..05C2 ; XID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; XID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; XID_Continue # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; XID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; XID_Continue # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0610..061A ; XID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +0620..063F ; XID_Continue # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; XID_Continue # Lm ARABIC TATWEEL +0641..064A ; XID_Continue # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F ; XID_Continue # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669 ; XID_Continue # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066E..066F ; XID_Continue # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; XID_Continue # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; XID_Continue # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; XID_Continue # Lo ARABIC LETTER AE +06D6..06DC ; XID_Continue # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; XID_Continue # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; XID_Continue # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; XID_Continue # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; XID_Continue # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; XID_Continue # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; XID_Continue # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; XID_Continue # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; XID_Continue # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; XID_Continue # Lo SYRIAC LETTER ALAPH +0711 ; XID_Continue # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; XID_Continue # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; XID_Continue # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..07A5 ; XID_Continue # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07A6..07B0 ; XID_Continue # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; XID_Continue # Lo THAANA LETTER NAA +07C0..07C9 ; XID_Continue # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; XID_Continue # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; XID_Continue # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; XID_Continue # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; XID_Continue # Lm NKO LAJANYALAN +07FD ; XID_Continue # Mn NKO DANTAYALAN +0800..0815 ; XID_Continue # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; XID_Continue # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; XID_Continue # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; XID_Continue # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; XID_Continue # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; XID_Continue # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; XID_Continue # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; XID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0840..0858 ; XID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; XID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; XID_Continue # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +0903 ; XID_Continue # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; XID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; XID_Continue # Mn DEVANAGARI VOWEL SIGN OE +093B ; XID_Continue # Mc DEVANAGARI VOWEL SIGN OOE +093C ; XID_Continue # Mn DEVANAGARI SIGN NUKTA +093D ; XID_Continue # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; XID_Continue # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; XID_Continue # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; XID_Continue # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; XID_Continue # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; XID_Continue # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; XID_Continue # Lo DEVANAGARI OM +0951..0957 ; XID_Continue # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; XID_Continue # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; XID_Continue # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; XID_Continue # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0971 ; XID_Continue # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; XID_Continue # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0981 ; XID_Continue # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; XID_Continue # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; XID_Continue # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; XID_Continue # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; XID_Continue # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; XID_Continue # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; XID_Continue # Lo BENGALI LETTER LA +09B6..09B9 ; XID_Continue # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; XID_Continue # Mn BENGALI SIGN NUKTA +09BD ; XID_Continue # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; XID_Continue # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; XID_Continue # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; XID_Continue # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; XID_Continue # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; XID_Continue # Mn BENGALI SIGN VIRAMA +09CE ; XID_Continue # Lo BENGALI LETTER KHANDA TA +09D7 ; XID_Continue # Mc BENGALI AU LENGTH MARK +09DC..09DD ; XID_Continue # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; XID_Continue # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; XID_Continue # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; XID_Continue # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; XID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; XID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA +09FE ; XID_Continue # Mn BENGALI SANDHI MARK +0A01..0A02 ; XID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; XID_Continue # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; XID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; XID_Continue # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; XID_Continue # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; XID_Continue # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; XID_Continue # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; XID_Continue # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; XID_Continue # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; XID_Continue # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; XID_Continue # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; XID_Continue # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; XID_Continue # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; XID_Continue # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; XID_Continue # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; XID_Continue # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; XID_Continue # Lo GURMUKHI LETTER FA +0A66..0A6F ; XID_Continue # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; XID_Continue # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; XID_Continue # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; XID_Continue # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; XID_Continue # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; XID_Continue # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; XID_Continue # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; XID_Continue # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; XID_Continue # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; XID_Continue # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; XID_Continue # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; XID_Continue # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; XID_Continue # Mn GUJARATI SIGN NUKTA +0ABD ; XID_Continue # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; XID_Continue # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; XID_Continue # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; XID_Continue # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; XID_Continue # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; XID_Continue # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; XID_Continue # Mn GUJARATI SIGN VIRAMA +0AD0 ; XID_Continue # Lo GUJARATI OM +0AE0..0AE1 ; XID_Continue # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; XID_Continue # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; XID_Continue # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF9 ; XID_Continue # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; XID_Continue # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; XID_Continue # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; XID_Continue # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; XID_Continue # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; XID_Continue # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; XID_Continue # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; XID_Continue # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; XID_Continue # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; XID_Continue # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; XID_Continue # Mn ORIYA SIGN NUKTA +0B3D ; XID_Continue # Lo ORIYA SIGN AVAGRAHA +0B3E ; XID_Continue # Mc ORIYA VOWEL SIGN AA +0B3F ; XID_Continue # Mn ORIYA VOWEL SIGN I +0B40 ; XID_Continue # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; XID_Continue # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; XID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; XID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; XID_Continue # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; XID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; XID_Continue # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; XID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; XID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; XID_Continue # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; XID_Continue # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B71 ; XID_Continue # Lo ORIYA LETTER WA +0B82 ; XID_Continue # Mn TAMIL SIGN ANUSVARA +0B83 ; XID_Continue # Lo TAMIL SIGN VISARGA +0B85..0B8A ; XID_Continue # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; XID_Continue # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; XID_Continue # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; XID_Continue # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; XID_Continue # Lo TAMIL LETTER JA +0B9E..0B9F ; XID_Continue # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; XID_Continue # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; XID_Continue # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; XID_Continue # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; XID_Continue # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; XID_Continue # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; XID_Continue # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; XID_Continue # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; XID_Continue # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; XID_Continue # Mn TAMIL SIGN VIRAMA +0BD0 ; XID_Continue # Lo TAMIL OM +0BD7 ; XID_Continue # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; XID_Continue # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C00 ; XID_Continue # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; XID_Continue # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; XID_Continue # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; XID_Continue # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; XID_Continue # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; XID_Continue # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; XID_Continue # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; XID_Continue # Mn TELUGU SIGN NUKTA +0C3D ; XID_Continue # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; XID_Continue # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; XID_Continue # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; XID_Continue # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C80 ; XID_Continue # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; XID_Continue # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; XID_Continue # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; XID_Continue # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; XID_Continue # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; XID_Continue # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; XID_Continue # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; XID_Continue # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; XID_Continue # Mn KANNADA SIGN NUKTA +0CBD ; XID_Continue # Lo KANNADA SIGN AVAGRAHA +0CBE ; XID_Continue # Mc KANNADA VOWEL SIGN AA +0CBF ; XID_Continue # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; XID_Continue # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; XID_Continue # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; XID_Continue # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; XID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; XID_Continue # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; XID_Continue # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; XID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; XID_Continue # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; XID_Continue # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; XID_Continue # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; XID_Continue # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; XID_Continue # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; XID_Continue # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; XID_Continue # Mn MALAYALAM SIGN VIRAMA +0D4E ; XID_Continue # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; XID_Continue # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; XID_Continue # Mc MALAYALAM AU LENGTH MARK +0D5F..0D61 ; XID_Continue # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; XID_Continue # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; XID_Continue # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D7A..0D7F ; XID_Continue # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; XID_Continue # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; XID_Continue # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; XID_Continue # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; XID_Continue # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; XID_Continue # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; XID_Continue # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; XID_Continue # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; XID_Continue # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; XID_Continue # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; XID_Continue # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; XID_Continue # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; XID_Continue # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; XID_Continue # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; XID_Continue # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E01..0E30 ; XID_Continue # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; XID_Continue # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; XID_Continue # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; XID_Continue # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; XID_Continue # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; XID_Continue # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; XID_Continue # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E50..0E59 ; XID_Continue # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E81..0E82 ; XID_Continue # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; XID_Continue # Lo LAO LETTER KHO TAM +0E86..0E8A ; XID_Continue # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; XID_Continue # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; XID_Continue # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; XID_Continue # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; XID_Continue # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; XID_Continue # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; XID_Continue # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; XID_Continue # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; XID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; XID_Continue # Lm LAO KO LA +0EC8..0ECE ; XID_Continue # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM +0F18..0F19 ; XID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F20..0F29 ; XID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F35 ; XID_Continue # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; XID_Continue # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; XID_Continue # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; XID_Continue # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; XID_Continue # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; XID_Continue # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; XID_Continue # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; XID_Continue # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; XID_Continue # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; XID_Continue # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; XID_Continue # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; XID_Continue # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; XID_Continue # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; XID_Continue # Mn TIBETAN SYMBOL PADMA GDAN +1000..102A ; XID_Continue # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; XID_Continue # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; XID_Continue # Mc MYANMAR VOWEL SIGN E +1032..1037 ; XID_Continue # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; XID_Continue # Mc MYANMAR SIGN VISARGA +1039..103A ; XID_Continue # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; XID_Continue # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; XID_Continue # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; XID_Continue # Lo MYANMAR LETTER GREAT SA +1040..1049 ; XID_Continue # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +1050..1055 ; XID_Continue # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; XID_Continue # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; XID_Continue # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; XID_Continue # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; XID_Continue # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; XID_Continue # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; XID_Continue # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; XID_Continue # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; XID_Continue # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; XID_Continue # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; XID_Continue # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; XID_Continue # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; XID_Continue # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; XID_Continue # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; XID_Continue # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; XID_Continue # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; XID_Continue # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; XID_Continue # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; XID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; XID_Continue # Mn MYANMAR VOWEL SIGN AITON AI +10A0..10C5 ; XID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Continue # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; XID_Continue # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; XID_Continue # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; XID_Continue # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; XID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; XID_Continue # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; XID_Continue # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; XID_Continue # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; XID_Continue # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; XID_Continue # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; XID_Continue # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; XID_Continue # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; XID_Continue # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1369..1371 ; XID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +1380..138F ; XID_Continue # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; XID_Continue # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; XID_Continue # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; XID_Continue # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; XID_Continue # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; XID_Continue # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; XID_Continue # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; XID_Continue # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; XID_Continue # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; XID_Continue # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; XID_Continue # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; XID_Continue # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; XID_Continue # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1732..1733 ; XID_Continue # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; XID_Continue # Mc HANUNOO SIGN PAMUDPOD +1740..1751 ; XID_Continue # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; XID_Continue # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; XID_Continue # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; XID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; XID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; XID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; XID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; XID_Continue # Mc KHMER VOWEL SIGN AA +17B7..17BD ; XID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; XID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; XID_Continue # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; XID_Continue # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; XID_Continue # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D7 ; XID_Continue # Lm KHMER SIGN LEK TOO +17DC ; XID_Continue # Lo KHMER SIGN AVAKRAHASANYA +17DD ; XID_Continue # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; XID_Continue # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +180B..180D ; XID_Continue # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; XID_Continue # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; XID_Continue # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; XID_Continue # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; XID_Continue # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; XID_Continue # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; XID_Continue # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; XID_Continue # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; XID_Continue # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; XID_Continue # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; XID_Continue # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; XID_Continue # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; XID_Continue # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; XID_Continue # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; XID_Continue # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; XID_Continue # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; XID_Continue # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; XID_Continue # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; XID_Continue # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; XID_Continue # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; XID_Continue # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1946..194F ; XID_Continue # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; XID_Continue # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; XID_Continue # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; XID_Continue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; XID_Continue # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; XID_Continue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; XID_Continue # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; XID_Continue # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; XID_Continue # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; XID_Continue # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; XID_Continue # Mn BUGINESE VOWEL SIGN AE +1A20..1A54 ; XID_Continue # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; XID_Continue # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; XID_Continue # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; XID_Continue # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; XID_Continue # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; XID_Continue # Mn TAI THAM SIGN SAKOT +1A61 ; XID_Continue # Mc TAI THAM VOWEL SIGN A +1A62 ; XID_Continue # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; XID_Continue # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; XID_Continue # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; XID_Continue # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; XID_Continue # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; XID_Continue # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; XID_Continue # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK +1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; XID_Continue # Mc BALINESE SIGN BISAH +1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; XID_Continue # Mn BALINESE SIGN REREKAN +1B35 ; XID_Continue # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; XID_Continue # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; XID_Continue # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; XID_Continue # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; XID_Continue # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; XID_Continue # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; XID_Continue # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; XID_Continue # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; XID_Continue # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B6B..1B73 ; XID_Continue # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; XID_Continue # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; XID_Continue # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; XID_Continue # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; XID_Continue # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; XID_Continue # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; XID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; XID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; XID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; XID_Continue # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; XID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; XID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; XID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE6 ; XID_Continue # Mn BATAK SIGN TOMPI +1BE7 ; XID_Continue # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; XID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; XID_Continue # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; XID_Continue # Mn BATAK VOWEL SIGN KARO O +1BEE ; XID_Continue # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; XID_Continue # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; XID_Continue # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C00..1C23 ; XID_Continue # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; XID_Continue # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; XID_Continue # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; XID_Continue # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; XID_Continue # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C40..1C49 ; XID_Continue # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; XID_Continue # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; XID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; XID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; XID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C8A ; XID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; XID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; XID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CD0..1CD2 ; XID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; XID_Continue # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; XID_Continue # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; XID_Continue # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC ; XID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED ; XID_Continue # Mn VEDIC SIGN TIRYAK +1CEE..1CF3 ; XID_Continue # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; XID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; XID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; XID_Continue # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; XID_Continue # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA ; XID_Continue # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; XID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; XID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; XID_Continue # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; XID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; XID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; XID_Continue # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1F15 ; XID_Continue # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; XID_Continue # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; XID_Continue # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; XID_Continue # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; XID_Continue # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; XID_Continue # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; XID_Continue # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; XID_Continue # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; XID_Continue # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; XID_Continue # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; XID_Continue # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; XID_Continue # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; XID_Continue # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200C..200D ; XID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE +2054 ; XID_Continue # Pc INVERTED UNDERTIE +2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; XID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20D0..20DC ; XID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; XID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20F0 ; XID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2102 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL C +2107 ; XID_Continue # L& EULER CONSTANT +210A..2113 ; XID_Continue # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL N +2118 ; XID_Continue # Sm SCRIPT CAPITAL P +2119..211D ; XID_Continue # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL Z +2126 ; XID_Continue # L& OHM SIGN +2128 ; XID_Continue # L& BLACK-LETTER CAPITAL Z +212A..212D ; XID_Continue # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; XID_Continue # So ESTIMATED SYMBOL +212F..2134 ; XID_Continue # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; XID_Continue # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; XID_Continue # L& INFORMATION SOURCE +213C..213F ; XID_Continue # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; XID_Continue # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; XID_Continue # L& TURNED SMALL F +2160..2182 ; XID_Continue # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; XID_Continue # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; XID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; XID_Continue # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; XID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; XID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; XID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; XID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; XID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; XID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; XID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D7F ; XID_Continue # Mn TIFINAGH CONSONANT JOINER +2D80..2D96 ; XID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; XID_Continue # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +3005 ; XID_Continue # Lm IDEOGRAPHIC ITERATION MARK +3006 ; XID_Continue # Lo IDEOGRAPHIC CLOSING MARK +3007 ; XID_Continue # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; XID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D ; XID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; XID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; XID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; XID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; XID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; XID_Continue # Lo MASU MARK +3041..3096 ; XID_Continue # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A ; XID_Continue # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; XID_Continue # Po KATAKANA MIDDLE DOT +30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO +3105..312F ; XID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; XID_Continue # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; XID_Continue # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; XID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; XID_Continue # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; XID_Continue # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; XID_Continue # Lm YI SYLLABLE WU +A016..A48C ; XID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; XID_Continue # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; XID_Continue # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; XID_Continue # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; XID_Continue # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; XID_Continue # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; XID_Continue # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; XID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; XID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; XID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; XID_Continue # Mn COMBINING CYRILLIC VZMET +A674..A67D ; XID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67F ; XID_Continue # Lm CYRILLIC PAYEROK +A680..A69B ; XID_Continue # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; XID_Continue # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; XID_Continue # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; XID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; XID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; XID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; XID_Continue # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; XID_Continue # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; XID_Continue # Lm MODIFIER LETTER US +A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; XID_Continue # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; XID_Continue # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; XID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; XID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A802 ; XID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; XID_Continue # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; XID_Continue # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; XID_Continue # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; XID_Continue # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; XID_Continue # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; XID_Continue # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; XID_Continue # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; XID_Continue # Mc SYLOTI NAGRI VOWEL SIGN OO +A82C ; XID_Continue # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A840..A873 ; XID_Continue # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; XID_Continue # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; XID_Continue # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; XID_Continue # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; XID_Continue # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8D0..A8D9 ; XID_Continue # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1 ; XID_Continue # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; XID_Continue # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; XID_Continue # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; XID_Continue # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; XID_Continue # Mn DEVANAGARI VOWEL SIGN AY +A900..A909 ; XID_Continue # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; XID_Continue # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; XID_Continue # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A930..A946 ; XID_Continue # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; XID_Continue # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; XID_Continue # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A960..A97C ; XID_Continue # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; XID_Continue # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; XID_Continue # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; XID_Continue # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; XID_Continue # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; XID_Continue # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; XID_Continue # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; XID_Continue # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; XID_Continue # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; XID_Continue # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9CF ; XID_Continue # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; XID_Continue # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9E0..A9E4 ; XID_Continue # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; XID_Continue # Mn MYANMAR SIGN SHAN SAW +A9E6 ; XID_Continue # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; XID_Continue # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; XID_Continue # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; XID_Continue # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; XID_Continue # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; XID_Continue # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; XID_Continue # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; XID_Continue # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; XID_Continue # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; XID_Continue # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; XID_Continue # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; XID_Continue # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; XID_Continue # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; XID_Continue # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; XID_Continue # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; XID_Continue # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA60..AA6F ; XID_Continue # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; XID_Continue # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; XID_Continue # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; XID_Continue # Lo MYANMAR LETTER AITON RA +AA7B ; XID_Continue # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; XID_Continue # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; XID_Continue # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; XID_Continue # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB0 ; XID_Continue # Mn TAI VIET MAI KANG +AAB1 ; XID_Continue # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; XID_Continue # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; XID_Continue # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; XID_Continue # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; XID_Continue # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; XID_Continue # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; XID_Continue # Lo TAI VIET TONE MAI NUENG +AAC1 ; XID_Continue # Mn TAI VIET TONE MAI THO +AAC2 ; XID_Continue # Lo TAI VIET TONE MAI SONG +AADB..AADC ; XID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; XID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; XID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; XID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; XID_Continue # Mn MEETEI MAYEK VIRAMA +AB01..AB06 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; XID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; XID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; XID_Continue # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; XID_Continue # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; XID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; XID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; XID_Continue # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; XID_Continue # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; XID_Continue # Mc MEETEI MAYEK LUM IYEK +ABED ; XID_Continue # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; XID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; XID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; XID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; XID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; XID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; XID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; XID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; XID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; XID_Continue # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; XID_Continue # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; XID_Continue # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; XID_Continue # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; XID_Continue # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; XID_Continue # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; XID_Continue # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; XID_Continue # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; XID_Continue # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FC5D ; XID_Continue # Lo [139] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM +FC64..FD3D ; XID_Continue # Lo [218] ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH REH FINAL FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; XID_Continue # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; XID_Continue # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDF9 ; XID_Continue # Lo [10] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE SALLA ISOLATED FORM +FE00..FE0F ; XID_Continue # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; XID_Continue # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE33..FE34 ; XID_Continue # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; XID_Continue # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE71 ; XID_Continue # Lo ARABIC TATWEEL WITH FATHATAN ABOVE +FE73 ; XID_Continue # Lo ARABIC TAIL FRAGMENT +FE77 ; XID_Continue # Lo ARABIC FATHA MEDIAL FORM +FE79 ; XID_Continue # Lo ARABIC DAMMA MEDIAL FORM +FE7B ; XID_Continue # Lo ARABIC KASRA MEDIAL FORM +FE7D ; XID_Continue # Lo ARABIC SHADDA MEDIAL FORM +FE7F..FEFC ; XID_Continue # Lo [126] ARABIC SUKUN MEDIAL FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF10..FF19 ; XID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE +FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF65 ; XID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; XID_Continue # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; XID_Continue # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; XID_Continue # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; XID_Continue # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; XID_Continue # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; XID_Continue # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; XID_Continue # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; XID_Continue # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; XID_Continue # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; XID_Continue # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +101FD ; XID_Continue # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C ; XID_Continue # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; XID_Continue # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0 ; XID_Continue # Mn COPTIC EPACT THOUSANDS MARK +10300..1031F ; XID_Continue # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; XID_Continue # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; XID_Continue # Nl GOTHIC LETTER NINETY +10342..10349 ; XID_Continue # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; XID_Continue # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; XID_Continue # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; XID_Continue # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; XID_Continue # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; XID_Continue # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; XID_Continue # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; XID_Continue # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; XID_Continue # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; XID_Continue # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; XID_Continue # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; XID_Continue # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; XID_Continue # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; XID_Continue # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; XID_Continue # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; XID_Continue # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; XID_Continue # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; XID_Continue # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; XID_Continue # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; XID_Continue # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; XID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; XID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; XID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; XID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; XID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; XID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; XID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; XID_Continue # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; XID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; XID_Continue # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; XID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; XID_Continue # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; XID_Continue # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; XID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; XID_Continue # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; XID_Continue # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; XID_Continue # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; XID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A +10A01..10A03 ; XID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; XID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; XID_Continue # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; XID_Continue # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; XID_Continue # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; XID_Continue # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; XID_Continue # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; XID_Continue # Mn KHAROSHTHI VIRAMA +10A60..10A7C ; XID_Continue # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; XID_Continue # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; XID_Continue # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; XID_Continue # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; XID_Continue # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10B00..10B35 ; XID_Continue # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; XID_Continue # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; XID_Continue # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; XID_Continue # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; XID_Continue # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; XID_Continue # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; XID_Continue # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; XID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; XID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; XID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; XID_Continue # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; XID_Continue # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; XID_Continue # Lm GARAY VOWEL LENGTH MARK +10D4F ; XID_Continue # Lo GARAY SUKUN +10D50..10D65 ; XID_Continue # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; XID_Continue # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; XID_Continue # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; XID_Continue # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10E80..10EA9 ; XID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; XID_Continue # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F70..10F81 ; XID_Continue # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; XID_Continue # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10FB0..10FC4 ; XID_Continue # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; XID_Continue # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; XID_Continue # Mc BRAHMI SIGN CANDRABINDU +11001 ; XID_Continue # Mn BRAHMI SIGN ANUSVARA +11002 ; XID_Continue # Mc BRAHMI SIGN VISARGA +11003..11037 ; XID_Continue # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; XID_Continue # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11066..1106F ; XID_Continue # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; XID_Continue # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; XID_Continue # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; XID_Continue # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; XID_Continue # Lo BRAHMI LETTER OLD TAMIL LLA +1107F..11081 ; XID_Continue # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +11082 ; XID_Continue # Mc KAITHI SIGN VISARGA +11083..110AF ; XID_Continue # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; XID_Continue # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; XID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; XID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; XID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; XID_Continue # Mn KAITHI VOWEL SIGN VOCALIC R +110D0..110E8 ; XID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; XID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; XID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; XID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; XID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; XID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; XID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; XID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11144 ; XID_Continue # Lo CHAKMA LETTER LHAA +11145..11146 ; XID_Continue # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; XID_Continue # Lo CHAKMA LETTER VAA +11150..11172 ; XID_Continue # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; XID_Continue # Mn MAHAJANI SIGN NUKTA +11176 ; XID_Continue # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; XID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; XID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; XID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; XID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; XID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; XID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; XID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C9..111CC ; XID_Continue # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CE ; XID_Continue # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; XID_Continue # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; XID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; XID_Continue # Lo SHARADA EKAM +111DC ; XID_Continue # Lo SHARADA HEADSTROKE +11200..11211 ; XID_Continue # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; XID_Continue # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; XID_Continue # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; XID_Continue # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; XID_Continue # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; XID_Continue # Mn KHOJKI SIGN ANUSVARA +11235 ; XID_Continue # Mc KHOJKI SIGN VIRAMA +11236..11237 ; XID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; XID_Continue # Mn KHOJKI SIGN SUKUN +1123F..11240 ; XID_Continue # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; XID_Continue # Mn KHOJKI VOWEL SIGN VOCALIC R +11280..11286 ; XID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; XID_Continue # Lo MULTANI LETTER GHA +1128A..1128D ; XID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; XID_Continue # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; XID_Continue # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; XID_Continue # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; XID_Continue # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; XID_Continue # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; XID_Continue # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; XID_Continue # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301 ; XID_Continue # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; XID_Continue # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; XID_Continue # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; XID_Continue # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; XID_Continue # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; XID_Continue # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; XID_Continue # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; XID_Continue # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C ; XID_Continue # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D ; XID_Continue # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; XID_Continue # Mn GRANTHA VOWEL SIGN II +11341..11344 ; XID_Continue # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; XID_Continue # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; XID_Continue # Lo GRANTHA OM +11357 ; XID_Continue # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; XID_Continue # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; XID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; XID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; XID_Continue # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; XID_Continue # Lo TULU-TIGALARI LETTER EE +1138E ; XID_Continue # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; XID_Continue # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; XID_Continue # Lo TULU-TIGALARI SIGN AVAGRAHA +113B8..113BA ; XID_Continue # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; XID_Continue # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; XID_Continue # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; XID_Continue # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; XID_Continue # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; XID_Continue # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CE ; XID_Continue # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; XID_Continue # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; XID_Continue # Mn TULU-TIGALARI CONJOINER +113D1 ; XID_Continue # Lo TULU-TIGALARI REPHA +113D2 ; XID_Continue # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; XID_Continue # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; XID_Continue # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11400..11434 ; XID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; XID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; XID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; XID_Continue # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; XID_Continue # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; XID_Continue # Mc NEWA SIGN VISARGA +11446 ; XID_Continue # Mn NEWA SIGN NUKTA +11447..1144A ; XID_Continue # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +11450..11459 ; XID_Continue # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145E ; XID_Continue # Mn NEWA SANDHI MARK +1145F..11461 ; XID_Continue # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; XID_Continue # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; XID_Continue # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; XID_Continue # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; XID_Continue # Mc TIRHUTA VOWEL SIGN E +114BA ; XID_Continue # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; XID_Continue # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; XID_Continue # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; XID_Continue # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; XID_Continue # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; XID_Continue # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; XID_Continue # Lo TIRHUTA OM +114D0..114D9 ; XID_Continue # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; XID_Continue # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; XID_Continue # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; XID_Continue # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; XID_Continue # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; XID_Continue # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; XID_Continue # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; XID_Continue # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115D8..115DB ; XID_Continue # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; XID_Continue # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; XID_Continue # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; XID_Continue # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; XID_Continue # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; XID_Continue # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; XID_Continue # Mn MODI SIGN ANUSVARA +1163E ; XID_Continue # Mc MODI SIGN VISARGA +1163F..11640 ; XID_Continue # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11644 ; XID_Continue # Lo MODI SIGN HUVA +11650..11659 ; XID_Continue # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; XID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; XID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; XID_Continue # Mc TAKRI SIGN VISARGA +116AD ; XID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; XID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; XID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; XID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA +116B8 ; XID_Continue # Lo TAKRI LETTER ARCHAIC KHA +116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; XID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11700..1171A ; XID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D ; XID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; XID_Continue # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; XID_Continue # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; XID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; XID_Continue # Mc AHOM VOWEL SIGN E +11727..1172B ; XID_Continue # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; XID_Continue # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +11740..11746 ; XID_Continue # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; XID_Continue # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; XID_Continue # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; XID_Continue # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; XID_Continue # Mc DOGRA SIGN VISARGA +11839..1183A ; XID_Continue # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +118A0..118DF ; XID_Continue # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; XID_Continue # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118FF..11906 ; XID_Continue # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; XID_Continue # Lo DIVES AKURU LETTER O +1190C..11913 ; XID_Continue # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; XID_Continue # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; XID_Continue # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; XID_Continue # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; XID_Continue # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; XID_Continue # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; XID_Continue # Mc DIVES AKURU SIGN HALANTA +1193E ; XID_Continue # Mn DIVES AKURU VIRAMA +1193F ; XID_Continue # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; XID_Continue # Mc DIVES AKURU MEDIAL YA +11941 ; XID_Continue # Lo DIVES AKURU INITIAL RA +11942 ; XID_Continue # Mc DIVES AKURU MEDIAL RA +11943 ; XID_Continue # Mn DIVES AKURU SIGN NUKTA +11950..11959 ; XID_Continue # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; XID_Continue # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; XID_Continue # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; XID_Continue # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; XID_Continue # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; XID_Continue # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; XID_Continue # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; XID_Continue # Mn NANDINAGARI SIGN VIRAMA +119E1 ; XID_Continue # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; XID_Continue # Lo NANDINAGARI HEADSTROKE +119E4 ; XID_Continue # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; XID_Continue # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; XID_Continue # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; XID_Continue # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; XID_Continue # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; XID_Continue # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; XID_Continue # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; XID_Continue # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; XID_Continue # Mn ZANABAZAR SQUARE SUBJOINER +11A50 ; XID_Continue # Lo SOYOMBO LETTER A +11A51..11A56 ; XID_Continue # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; XID_Continue # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; XID_Continue # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; XID_Continue # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; XID_Continue # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; XID_Continue # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; XID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; XID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; XID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; XID_Continue # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; XID_Continue # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; XID_Continue # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; XID_Continue # Mc BHAIKSUKI SIGN VISARGA +11C3F ; XID_Continue # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; XID_Continue # Lo BHAIKSUKI SIGN AVAGRAHA +11C50..11C59 ; XID_Continue # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C72..11C8F ; XID_Continue # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; XID_Continue # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; XID_Continue # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; XID_Continue # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; XID_Continue # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; XID_Continue # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; XID_Continue # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; XID_Continue # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; XID_Continue # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; XID_Continue # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; XID_Continue # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; XID_Continue # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; XID_Continue # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; XID_Continue # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; XID_Continue # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; XID_Continue # Lo MASARAM GONDI REPHA +11D47 ; XID_Continue # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; XID_Continue # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; XID_Continue # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; XID_Continue # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; XID_Continue # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; XID_Continue # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; XID_Continue # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; XID_Continue # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; XID_Continue # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; XID_Continue # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; XID_Continue # Mn GUNJALA GONDI VIRAMA +11D98 ; XID_Continue # Lo GUNJALA GONDI OM +11DA0..11DA9 ; XID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; XID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; XID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; XID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; XID_Continue # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; XID_Continue # Lo KAWI SIGN REPHA +11F03 ; XID_Continue # Mc KAWI SIGN VISARGA +11F04..11F10 ; XID_Continue # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; XID_Continue # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; XID_Continue # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; XID_Continue # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; XID_Continue # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; XID_Continue # Mn KAWI VOWEL SIGN EU +11F41 ; XID_Continue # Mc KAWI SIGN KILLER +11F42 ; XID_Continue # Mn KAWI CONJOINER +11F50..11F59 ; XID_Continue # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; XID_Continue # Mn KAWI SIGN NUKTA +11FB0 ; XID_Continue # Lo LISU LETTER YHA +12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; XID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; XID_Continue # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; XID_Continue # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13440 ; XID_Continue # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446 ; XID_Continue # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455 ; XID_Continue # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +13460..143FA ; XID_Continue # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; XID_Continue # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1611E..16129 ; XID_Continue # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; XID_Continue # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612F ; XID_Continue # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16130..16139 ; XID_Continue # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE +16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; XID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; XID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A70..16ABE ; XID_Continue # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; XID_Continue # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; XID_Continue # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; XID_Continue # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B00..16B2F ; XID_Continue # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; XID_Continue # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B40..16B43 ; XID_Continue # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B50..16B59 ; XID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B63..16B77 ; XID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; XID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; XID_Continue # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; XID_Continue # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; XID_Continue # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D70..16D79 ; XID_Continue # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16E40..16E7F ; XID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; XID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; XID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; XID_Continue # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; XID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; XID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; XID_Continue # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK +16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; XID_Continue # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; XID_Continue # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; XID_Continue # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; XID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; XID_Continue # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; XID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; XID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; XID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; XID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; XID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; XID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9D..1BC9E ; XID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CCF0..1CCF9 ; XID_Continue # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CF00..1CF2D ; XID_Continue # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; XID_Continue # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; XID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; XID_Continue # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; XID_Continue # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; XID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; XID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; XID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D400..1D454 ; XID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; XID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; XID_Continue # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; XID_Continue # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; XID_Continue # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; XID_Continue # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; XID_Continue # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; XID_Continue # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; XID_Continue # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; XID_Continue # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; XID_Continue # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; XID_Continue # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; XID_Continue # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; XID_Continue # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; XID_Continue # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; XID_Continue # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; XID_Continue # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; XID_Continue # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; XID_Continue # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; XID_Continue # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; XID_Continue # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; XID_Continue # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; XID_Continue # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; XID_Continue # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; XID_Continue # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; XID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; XID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DA00..1DA36 ; XID_Continue # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; XID_Continue # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; XID_Continue # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; XID_Continue # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; XID_Continue # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; XID_Continue # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; XID_Continue # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; XID_Continue # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; XID_Continue # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; XID_Continue # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E100..1E12C ; XID_Continue # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; XID_Continue # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; XID_Continue # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; XID_Continue # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; XID_Continue # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; XID_Continue # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; XID_Continue # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB ; XID_Continue # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; XID_Continue # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; XID_Continue # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4D0..1E4EA ; XID_Continue # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; XID_Continue # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; XID_Continue # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9 ; XID_Continue # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; XID_Continue # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5EE..1E5EF ; XID_Continue # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E5F0 ; XID_Continue # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; XID_Continue # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E7E0..1E7E6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; XID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; XID_Continue # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; XID_Continue # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8D0..1E8D6 ; XID_Continue # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943 ; XID_Continue # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; XID_Continue # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; XID_Continue # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; XID_Continue # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1EE00..1EE03 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1FBF0..1FBF9 ; XID_Continue # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; XID_Continue # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; XID_Continue # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 144522 + +# ================================================ + +# Derived Property: Default_Ignorable_Code_Point +# Generated from +# Other_Default_Ignorable_Code_Point +# + Cf (Format characters) +# + Variation_Selector +# - White_Space +# - FFF9..FFFB (Interlinear annotation format characters) +# - 13430..13440 (Egyptian hieroglyph format characters) +# - Prepended_Concatenation_Mark (Exceptional format characters that should be visible) +# +# There are currently no stability guarantees for DICP. However, the +# values of DICP interact with the derivation of XID_Continue +# and NFKC_CF, for which there are stability guarantees. +# Maintainers of this property should note that in the +# unlikely case that the DICP value changes for an existing character +# which is also XID_Continue=Yes, then exceptions must be put +# in place to ensure that the NFKC_CF mapping value for that +# existing character does not change. + +00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN +034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +061C ; Default_Ignorable_Code_Point # Cf ARABIC LETTER MARK +115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Default_Ignorable_Code_Point # Cf MONGOLIAN VOWEL SEPARATOR +180F ; Default_Ignorable_Code_Point # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Default_Ignorable_Code_Point # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; Default_Ignorable_Code_Point # Cn +2066..206F ; Default_Ignorable_Code_Point # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +3164 ; Default_Ignorable_Code_Point # Lo HANGUL FILLER +FE00..FE0F ; Default_Ignorable_Code_Point # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FEFF ; Default_Ignorable_Code_Point # Cf ZERO WIDTH NO-BREAK SPACE +FFA0 ; Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Default_Ignorable_Code_Point # Cn [9] .. +1BCA0..1BCA3 ; Default_Ignorable_Code_Point # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; Default_Ignorable_Code_Point # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Default_Ignorable_Code_Point # Cn +E0001 ; Default_Ignorable_Code_Point # Cf LANGUAGE TAG +E0002..E001F ; Default_Ignorable_Code_Point # Cn [30] .. +E0020..E007F ; Default_Ignorable_Code_Point # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; Default_Ignorable_Code_Point # Cn [128] .. +E0100..E01EF ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 +E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .. + +# Total code points: 4174 + +# ================================================ + +# Derived Property: Grapheme_Extend +# Generated from: Me + Mn + Other_Grapheme_Extend +# Note: depending on an application's interpretation of Co (private use), +# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither. + +0300..036F ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; Grapheme_Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Grapheme_Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; Grapheme_Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Grapheme_Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; Grapheme_Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Grapheme_Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Grapheme_Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Grapheme_Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Grapheme_Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Grapheme_Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Grapheme_Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; Grapheme_Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Grapheme_Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Grapheme_Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; Grapheme_Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Grapheme_Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Grapheme_Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Grapheme_Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Grapheme_Extend # Mn NKO DANTAYALAN +0816..0819 ; Grapheme_Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Grapheme_Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Grapheme_Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Grapheme_Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Grapheme_Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0897..089F ; Grapheme_Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Grapheme_Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; Grapheme_Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN OE +093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; Grapheme_Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Grapheme_Extend # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; Grapheme_Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Grapheme_Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Grapheme_Extend # Mn BENGALI SIGN CANDRABINDU +09BC ; Grapheme_Extend # Mn BENGALI SIGN NUKTA +09BE ; Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09C1..09C4 ; Grapheme_Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Grapheme_Extend # Mn BENGALI SIGN VIRAMA +09D7 ; Grapheme_Extend # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Grapheme_Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Grapheme_Extend # Mn BENGALI SANDHI MARK +0A01..0A02 ; Grapheme_Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Grapheme_Extend # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Grapheme_Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Grapheme_Extend # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Grapheme_Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Grapheme_Extend # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Grapheme_Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Grapheme_Extend # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; Grapheme_Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Grapheme_Extend # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Grapheme_Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Grapheme_Extend # Mn ORIYA SIGN CANDRABINDU +0B3C ; Grapheme_Extend # Mn ORIYA SIGN NUKTA +0B3E ; Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B3F ; Grapheme_Extend # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; Grapheme_Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Grapheme_Extend # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Grapheme_Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Grapheme_Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Grapheme_Extend # Mn TAMIL SIGN ANUSVARA +0BBE ; Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BC0 ; Grapheme_Extend # Mn TAMIL VOWEL SIGN II +0BCD ; Grapheme_Extend # Mn TAMIL SIGN VIRAMA +0BD7 ; Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0C00 ; Grapheme_Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Grapheme_Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Grapheme_Extend # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Grapheme_Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Grapheme_Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Grapheme_Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Grapheme_Extend # Mn KANNADA SIGN CANDRABINDU +0CBC ; Grapheme_Extend # Mn KANNADA SIGN NUKTA +0CBF ; Grapheme_Extend # Mn KANNADA VOWEL SIGN I +0CC0 ; Grapheme_Extend # Mc KANNADA VOWEL SIGN II +0CC2 ; Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CC6 ; Grapheme_Extend # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Grapheme_Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Grapheme_Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Grapheme_Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Grapheme_Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3E ; Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D41..0D44 ; Grapheme_Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Grapheme_Extend # Mn MALAYALAM SIGN VIRAMA +0D57 ; Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Grapheme_Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Grapheme_Extend # Mn SINHALA SIGN CANDRABINDU +0DCA ; Grapheme_Extend # Mn SINHALA SIGN AL-LAKUNA +0DCF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DD2..0DD4 ; Grapheme_Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Grapheme_Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DDF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +0E31 ; Grapheme_Extend # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Grapheme_Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; Grapheme_Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Grapheme_Extend # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Grapheme_Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; Grapheme_Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; Grapheme_Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Grapheme_Extend # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; Grapheme_Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Grapheme_Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Grapheme_Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Grapheme_Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Grapheme_Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Grapheme_Extend # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Grapheme_Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Grapheme_Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Grapheme_Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Grapheme_Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Grapheme_Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Grapheme_Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Grapheme_Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; Grapheme_Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Grapheme_Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; Grapheme_Extend # Mc TAGALOG SIGN PAMUDPOD +1732..1733 ; Grapheme_Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; Grapheme_Extend # Mc HANUNOO SIGN PAMUDPOD +1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Grapheme_Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Grapheme_Extend # Mn KHMER SIGN ATTHACAN +180B..180D ; Grapheme_Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Grapheme_Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; Grapheme_Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Grapheme_Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Grapheme_Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Grapheme_Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Grapheme_Extend # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; Grapheme_Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Grapheme_Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Grapheme_Extend # Mn BUGINESE VOWEL SIGN AE +1A56 ; Grapheme_Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Grapheme_Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Grapheme_Extend # Mn TAI THAM SIGN SAKOT +1A62 ; Grapheme_Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Grapheme_Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Grapheme_Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Grapheme_Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Grapheme_Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Grapheme_Extend # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Grapheme_Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Grapheme_Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Grapheme_Extend # Mn BALINESE SIGN REREKAN +1B35 ; Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Grapheme_Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Grapheme_Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Grapheme_Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B3D ; Grapheme_Extend # Mc BALINESE VOWEL SIGN LA LENGA TEDUNG +1B42 ; Grapheme_Extend # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Grapheme_Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B6B..1B73 ; Grapheme_Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Grapheme_Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Grapheme_Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Grapheme_Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Grapheme_Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; Grapheme_Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Grapheme_Extend # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Grapheme_Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Grapheme_Extend # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Grapheme_Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; Grapheme_Extend # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C2C..1C33 ; Grapheme_Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Grapheme_Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Grapheme_Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Grapheme_Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Grapheme_Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Grapheme_Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Grapheme_Extend # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Grapheme_Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Grapheme_Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Grapheme_Extend # Cf ZERO WIDTH NON-JOINER +20D0..20DC ; Grapheme_Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Grapheme_Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Grapheme_Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Grapheme_Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Grapheme_Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Grapheme_Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Grapheme_Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Grapheme_Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Grapheme_Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Grapheme_Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Grapheme_Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Grapheme_Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; Grapheme_Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Grapheme_Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Grapheme_Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Grapheme_Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Grapheme_Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Grapheme_Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Grapheme_Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A953 ; Grapheme_Extend # Mc REJANG VIRAMA +A980..A982 ; Grapheme_Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Grapheme_Extend # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Grapheme_Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Grapheme_Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9C0 ; Grapheme_Extend # Mc JAVANESE PANGKON +A9E5 ; Grapheme_Extend # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Grapheme_Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Grapheme_Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Grapheme_Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Grapheme_Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Grapheme_Extend # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; Grapheme_Extend # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Grapheme_Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Grapheme_Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Grapheme_Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Grapheme_Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Grapheme_Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Grapheme_Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Grapheme_Extend # Mn MEETEI MAYEK VIRAMA +ABE5 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Grapheme_Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; Grapheme_Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; Grapheme_Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; Grapheme_Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; Grapheme_Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Grapheme_Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Grapheme_Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Grapheme_Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Grapheme_Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Grapheme_Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Grapheme_Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Grapheme_Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Grapheme_Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Grapheme_Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Grapheme_Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EAB..10EAC ; Grapheme_Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC..10EFF ; Grapheme_Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; Grapheme_Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Grapheme_Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Grapheme_Extend # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Grapheme_Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Grapheme_Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Grapheme_Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Grapheme_Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Grapheme_Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Grapheme_Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Grapheme_Extend # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Grapheme_Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Grapheme_Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Grapheme_Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Grapheme_Extend # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Grapheme_Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Grapheme_Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C0 ; Grapheme_Extend # Mc SHARADA SIGN VIRAMA +111C9..111CC ; Grapheme_Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Grapheme_Extend # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Grapheme_Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Grapheme_Extend # Mn KHOJKI SIGN ANUSVARA +11235 ; Grapheme_Extend # Mc KHOJKI SIGN VIRAMA +11236..11237 ; Grapheme_Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Grapheme_Extend # Mn KHOJKI SIGN SUKUN +11241 ; Grapheme_Extend # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; Grapheme_Extend # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Grapheme_Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Grapheme_Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Grapheme_Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133E ; Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11340 ; Grapheme_Extend # Mn GRANTHA VOWEL SIGN II +1134D ; Grapheme_Extend # Mc GRANTHA SIGN VIRAMA +11357 ; Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +11366..1136C ; Grapheme_Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Grapheme_Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8 ; Grapheme_Extend # Mc TULU-TIGALARI VOWEL SIGN AA +113BB..113C0 ; Grapheme_Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Grapheme_Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Grapheme_Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; Grapheme_Extend # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK +113CE ; Grapheme_Extend # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Grapheme_Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Grapheme_Extend # Mn TULU-TIGALARI CONJOINER +113D2 ; Grapheme_Extend # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Grapheme_Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11438..1143F ; Grapheme_Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Grapheme_Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Grapheme_Extend # Mn NEWA SIGN NUKTA +1145E ; Grapheme_Extend # Mn NEWA SANDHI MARK +114B0 ; Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114B3..114B8 ; Grapheme_Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Grapheme_Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BD ; Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +114BF..114C0 ; Grapheme_Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Grapheme_Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF ; Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +115B2..115B5 ; Grapheme_Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Grapheme_Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Grapheme_Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Grapheme_Extend # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Grapheme_Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Grapheme_Extend # Mn MODI SIGN ANUSVARA +1163F..11640 ; Grapheme_Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Grapheme_Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Grapheme_Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Grapheme_Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Grapheme_Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Grapheme_Extend # Mn TAKRI SIGN NUKTA +1171D ; Grapheme_Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Grapheme_Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Grapheme_Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Grapheme_Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Grapheme_Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Grapheme_Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +11930 ; Grapheme_Extend # Mc DIVES AKURU VOWEL SIGN AA +1193B..1193C ; Grapheme_Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; Grapheme_Extend # Mc DIVES AKURU SIGN HALANTA +1193E ; Grapheme_Extend # Mn DIVES AKURU VIRAMA +11943 ; Grapheme_Extend # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; Grapheme_Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Grapheme_Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Grapheme_Extend # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Grapheme_Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Grapheme_Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Grapheme_Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Grapheme_Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Grapheme_Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Grapheme_Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Grapheme_Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Grapheme_Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Grapheme_Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Grapheme_Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Grapheme_Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Grapheme_Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Grapheme_Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Grapheme_Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Grapheme_Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Grapheme_Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Grapheme_Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Grapheme_Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Grapheme_Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Grapheme_Extend # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; Grapheme_Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Grapheme_Extend # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; Grapheme_Extend # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Grapheme_Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; Grapheme_Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Grapheme_Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Grapheme_Extend # Mn KAWI VOWEL SIGN EU +11F41 ; Grapheme_Extend # Mc KAWI SIGN KILLER +11F42 ; Grapheme_Extend # Mn KAWI CONJOINER +11F5A ; Grapheme_Extend # Mn KAWI SIGN NUKTA +13440 ; Grapheme_Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Grapheme_Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Grapheme_Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Grapheme_Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; Grapheme_Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Grapheme_Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; Grapheme_Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Grapheme_Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; Grapheme_Extend # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; Grapheme_Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1BC9D..1BC9E ; Grapheme_Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Grapheme_Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Grapheme_Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; Grapheme_Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Grapheme_Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Grapheme_Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Grapheme_Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Grapheme_Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Grapheme_Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Grapheme_Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Grapheme_Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Grapheme_Extend # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Grapheme_Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Grapheme_Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Grapheme_Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Grapheme_Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Grapheme_Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Grapheme_Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Grapheme_Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Grapheme_Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; Grapheme_Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Grapheme_Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Grapheme_Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Grapheme_Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Grapheme_Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; Grapheme_Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Grapheme_Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2193 + +# ================================================ + +# Derived Property: Grapheme_Base +# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend +# Note: depending on an application's interpretation of Co (private use), +# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither. + +0020 ; Grapheme_Base # Zs SPACE +0021..0023 ; Grapheme_Base # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Grapheme_Base # Sc DOLLAR SIGN +0025..0027 ; Grapheme_Base # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Grapheme_Base # Ps LEFT PARENTHESIS +0029 ; Grapheme_Base # Pe RIGHT PARENTHESIS +002A ; Grapheme_Base # Po ASTERISK +002B ; Grapheme_Base # Sm PLUS SIGN +002C ; Grapheme_Base # Po COMMA +002D ; Grapheme_Base # Pd HYPHEN-MINUS +002E..002F ; Grapheme_Base # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Grapheme_Base # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Grapheme_Base # Po [2] COLON..SEMICOLON +003C..003E ; Grapheme_Base # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Grapheme_Base # Po [2] QUESTION MARK..COMMERCIAL AT +0041..005A ; Grapheme_Base # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005B ; Grapheme_Base # Ps LEFT SQUARE BRACKET +005C ; Grapheme_Base # Po REVERSE SOLIDUS +005D ; Grapheme_Base # Pe RIGHT SQUARE BRACKET +005E ; Grapheme_Base # Sk CIRCUMFLEX ACCENT +005F ; Grapheme_Base # Pc LOW LINE +0060 ; Grapheme_Base # Sk GRAVE ACCENT +0061..007A ; Grapheme_Base # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +007B ; Grapheme_Base # Ps LEFT CURLY BRACKET +007C ; Grapheme_Base # Sm VERTICAL LINE +007D ; Grapheme_Base # Pe RIGHT CURLY BRACKET +007E ; Grapheme_Base # Sm TILDE +00A0 ; Grapheme_Base # Zs NO-BREAK SPACE +00A1 ; Grapheme_Base # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Grapheme_Base # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Grapheme_Base # So BROKEN BAR +00A7 ; Grapheme_Base # Po SECTION SIGN +00A8 ; Grapheme_Base # Sk DIAERESIS +00A9 ; Grapheme_Base # So COPYRIGHT SIGN +00AA ; Grapheme_Base # Lo FEMININE ORDINAL INDICATOR +00AB ; Grapheme_Base # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Grapheme_Base # Sm NOT SIGN +00AE ; Grapheme_Base # So REGISTERED SIGN +00AF ; Grapheme_Base # Sk MACRON +00B0 ; Grapheme_Base # So DEGREE SIGN +00B1 ; Grapheme_Base # Sm PLUS-MINUS SIGN +00B2..00B3 ; Grapheme_Base # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; Grapheme_Base # Sk ACUTE ACCENT +00B5 ; Grapheme_Base # L& MICRO SIGN +00B6..00B7 ; Grapheme_Base # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; Grapheme_Base # Sk CEDILLA +00B9 ; Grapheme_Base # No SUPERSCRIPT ONE +00BA ; Grapheme_Base # Lo MASCULINE ORDINAL INDICATOR +00BB ; Grapheme_Base # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; Grapheme_Base # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; Grapheme_Base # Po INVERTED QUESTION MARK +00C0..00D6 ; Grapheme_Base # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D7 ; Grapheme_Base # Sm MULTIPLICATION SIGN +00D8..00F6 ; Grapheme_Base # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F7 ; Grapheme_Base # Sm DIVISION SIGN +00F8..01BA ; Grapheme_Base # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Grapheme_Base # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Grapheme_Base # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Grapheme_Base # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Grapheme_Base # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Grapheme_Base # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; Grapheme_Base # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Grapheme_Base # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Grapheme_Base # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Grapheme_Base # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Grapheme_Base # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Grapheme_Base # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Grapheme_Base # Lm MODIFIER LETTER VOICING +02ED ; Grapheme_Base # Sk MODIFIER LETTER UNASPIRATED +02EE ; Grapheme_Base # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Grapheme_Base # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0370..0373 ; Grapheme_Base # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; Grapheme_Base # Lm GREEK NUMERAL SIGN +0375 ; Grapheme_Base # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; Grapheme_Base # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Grapheme_Base # Lm GREEK YPOGEGRAMMENI +037B..037D ; Grapheme_Base # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037E ; Grapheme_Base # Po GREEK QUESTION MARK +037F ; Grapheme_Base # L& GREEK CAPITAL LETTER YOT +0384..0385 ; Grapheme_Base # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0386 ; Grapheme_Base # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; Grapheme_Base # Po GREEK ANO TELEIA +0388..038A ; Grapheme_Base # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Grapheme_Base # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Grapheme_Base # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; Grapheme_Base # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F6 ; Grapheme_Base # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..0481 ; Grapheme_Base # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0482 ; Grapheme_Base # So CYRILLIC THOUSANDS SIGN +048A..052F ; Grapheme_Base # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Grapheme_Base # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Grapheme_Base # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; Grapheme_Base # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; Grapheme_Base # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; Grapheme_Base # Po ARMENIAN FULL STOP +058A ; Grapheme_Base # Pd ARMENIAN HYPHEN +058D..058E ; Grapheme_Base # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +058F ; Grapheme_Base # Sc ARMENIAN DRAM SIGN +05BE ; Grapheme_Base # Pd HEBREW PUNCTUATION MAQAF +05C0 ; Grapheme_Base # Po HEBREW PUNCTUATION PASEQ +05C3 ; Grapheme_Base # Po HEBREW PUNCTUATION SOF PASUQ +05C6 ; Grapheme_Base # Po HEBREW PUNCTUATION NUN HAFUKHA +05D0..05EA ; Grapheme_Base # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Grapheme_Base # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; Grapheme_Base # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +0606..0608 ; Grapheme_Base # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; Grapheme_Base # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; Grapheme_Base # Sc AFGHANI SIGN +060C..060D ; Grapheme_Base # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +060E..060F ; Grapheme_Base # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +061B ; Grapheme_Base # Po ARABIC SEMICOLON +061D..061F ; Grapheme_Base # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; Grapheme_Base # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; Grapheme_Base # Lm ARABIC TATWEEL +0641..064A ; Grapheme_Base # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +0660..0669 ; Grapheme_Base # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066A..066D ; Grapheme_Base # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; Grapheme_Base # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; Grapheme_Base # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; Grapheme_Base # Po ARABIC FULL STOP +06D5 ; Grapheme_Base # Lo ARABIC LETTER AE +06DE ; Grapheme_Base # So ARABIC START OF RUB EL HIZB +06E5..06E6 ; Grapheme_Base # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E9 ; Grapheme_Base # So ARABIC PLACE OF SAJDAH +06EE..06EF ; Grapheme_Base # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; Grapheme_Base # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; Grapheme_Base # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; Grapheme_Base # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; Grapheme_Base # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; Grapheme_Base # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +0710 ; Grapheme_Base # Lo SYRIAC LETTER ALAPH +0712..072F ; Grapheme_Base # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; Grapheme_Base # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; Grapheme_Base # Lo THAANA LETTER NAA +07C0..07C9 ; Grapheme_Base # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; Grapheme_Base # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; Grapheme_Base # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; Grapheme_Base # So NKO SYMBOL OO DENNEN +07F7..07F9 ; Grapheme_Base # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; Grapheme_Base # Lm NKO LAJANYALAN +07FE..07FF ; Grapheme_Base # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; Grapheme_Base # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; Grapheme_Base # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; Grapheme_Base # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; Grapheme_Base # Lm SAMARITAN MODIFIER LETTER I +0830..083E ; Grapheme_Base # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0840..0858 ; Grapheme_Base # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +085E ; Grapheme_Base # Po MANDAIC PUNCTUATION +0860..086A ; Grapheme_Base # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; Grapheme_Base # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; Grapheme_Base # Sk ARABIC RAISED ROUND DOT +0889..088E ; Grapheme_Base # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; Grapheme_Base # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; Grapheme_Base # Lm ARABIC SMALL FARSI YEH +0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Grapheme_Base # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093B ; Grapheme_Base # Mc DEVANAGARI VOWEL SIGN OOE +093D ; Grapheme_Base # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Grapheme_Base # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; Grapheme_Base # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Grapheme_Base # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; Grapheme_Base # Lo DEVANAGARI OM +0958..0961 ; Grapheme_Base # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0964..0965 ; Grapheme_Base # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; Grapheme_Base # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; Grapheme_Base # Po DEVANAGARI ABBREVIATION SIGN +0971 ; Grapheme_Base # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; Grapheme_Base # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0982..0983 ; Grapheme_Base # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Grapheme_Base # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Grapheme_Base # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Grapheme_Base # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Grapheme_Base # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Grapheme_Base # Lo BENGALI LETTER LA +09B6..09B9 ; Grapheme_Base # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; Grapheme_Base # Lo BENGALI SIGN AVAGRAHA +09BF..09C0 ; Grapheme_Base # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II +09C7..09C8 ; Grapheme_Base # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Grapheme_Base # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; Grapheme_Base # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; Grapheme_Base # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Grapheme_Base # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E6..09EF ; Grapheme_Base # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; Grapheme_Base # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; Grapheme_Base # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; Grapheme_Base # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; Grapheme_Base # So BENGALI ISSHAR +09FB ; Grapheme_Base # Sc BENGALI GANDA MARK +09FC ; Grapheme_Base # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; Grapheme_Base # Po BENGALI ABBREVIATION SIGN +0A03 ; Grapheme_Base # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Grapheme_Base # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Grapheme_Base # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Grapheme_Base # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Grapheme_Base # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Grapheme_Base # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Grapheme_Base # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Grapheme_Base # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; Grapheme_Base # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A59..0A5C ; Grapheme_Base # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Grapheme_Base # Lo GURMUKHI LETTER FA +0A66..0A6F ; Grapheme_Base # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A72..0A74 ; Grapheme_Base # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A76 ; Grapheme_Base # Po GURMUKHI ABBREVIATION SIGN +0A83 ; Grapheme_Base # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Grapheme_Base # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Grapheme_Base # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Grapheme_Base # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Grapheme_Base # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Grapheme_Base # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Grapheme_Base # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; Grapheme_Base # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Grapheme_Base # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; Grapheme_Base # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Grapheme_Base # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; Grapheme_Base # Lo GUJARATI OM +0AE0..0AE1 ; Grapheme_Base # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE6..0AEF ; Grapheme_Base # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Grapheme_Base # Po GUJARATI ABBREVIATION SIGN +0AF1 ; Grapheme_Base # Sc GUJARATI RUPEE SIGN +0AF9 ; Grapheme_Base # Lo GUJARATI LETTER ZHA +0B02..0B03 ; Grapheme_Base # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Grapheme_Base # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Grapheme_Base # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Grapheme_Base # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Grapheme_Base # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Grapheme_Base # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Grapheme_Base # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; Grapheme_Base # Lo ORIYA SIGN AVAGRAHA +0B40 ; Grapheme_Base # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; Grapheme_Base # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Grapheme_Base # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B5C..0B5D ; Grapheme_Base # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Grapheme_Base # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B66..0B6F ; Grapheme_Base # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; Grapheme_Base # So ORIYA ISSHAR +0B71 ; Grapheme_Base # Lo ORIYA LETTER WA +0B72..0B77 ; Grapheme_Base # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B83 ; Grapheme_Base # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Grapheme_Base # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Grapheme_Base # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Grapheme_Base # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Grapheme_Base # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Grapheme_Base # Lo TAMIL LETTER JA +0B9E..0B9F ; Grapheme_Base # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Grapheme_Base # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Grapheme_Base # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Grapheme_Base # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBF ; Grapheme_Base # Mc TAMIL VOWEL SIGN I +0BC1..0BC2 ; Grapheme_Base # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Grapheme_Base # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Grapheme_Base # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; Grapheme_Base # Lo TAMIL OM +0BE6..0BEF ; Grapheme_Base # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Grapheme_Base # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; Grapheme_Base # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; Grapheme_Base # Sc TAMIL RUPEE SIGN +0BFA ; Grapheme_Base # So TAMIL NUMBER SIGN +0C01..0C03 ; Grapheme_Base # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; Grapheme_Base # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Grapheme_Base # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Grapheme_Base # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; Grapheme_Base # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; Grapheme_Base # Lo TELUGU SIGN AVAGRAHA +0C41..0C44 ; Grapheme_Base # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C58..0C5A ; Grapheme_Base # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; Grapheme_Base # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; Grapheme_Base # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C66..0C6F ; Grapheme_Base # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; Grapheme_Base # Po TELUGU SIGN SIDDHAM +0C78..0C7E ; Grapheme_Base # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; Grapheme_Base # So TELUGU SIGN TUUMU +0C80 ; Grapheme_Base # Lo KANNADA SIGN SPACING CANDRABINDU +0C82..0C83 ; Grapheme_Base # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; Grapheme_Base # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; Grapheme_Base # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Grapheme_Base # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Grapheme_Base # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Grapheme_Base # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Grapheme_Base # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; Grapheme_Base # Lo KANNADA SIGN AVAGRAHA +0CBE ; Grapheme_Base # Mc KANNADA VOWEL SIGN AA +0CC1 ; Grapheme_Base # Mc KANNADA VOWEL SIGN U +0CC3..0CC4 ; Grapheme_Base # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR +0CDD..0CDE ; Grapheme_Base # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; Grapheme_Base # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE6..0CEF ; Grapheme_Base # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; Grapheme_Base # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; Grapheme_Base # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D02..0D03 ; Grapheme_Base # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; Grapheme_Base # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Grapheme_Base # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; Grapheme_Base # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; Grapheme_Base # Lo MALAYALAM SIGN AVAGRAHA +0D3F..0D40 ; Grapheme_Base # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II +0D46..0D48 ; Grapheme_Base # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Grapheme_Base # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; Grapheme_Base # Lo MALAYALAM LETTER DOT REPH +0D4F ; Grapheme_Base # So MALAYALAM SIGN PARA +0D54..0D56 ; Grapheme_Base # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D58..0D5E ; Grapheme_Base # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; Grapheme_Base # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D66..0D6F ; Grapheme_Base # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; Grapheme_Base # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; Grapheme_Base # So MALAYALAM DATE MARK +0D7A..0D7F ; Grapheme_Base # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D82..0D83 ; Grapheme_Base # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Grapheme_Base # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Grapheme_Base # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Grapheme_Base # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Grapheme_Base # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Grapheme_Base # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DD0..0DD1 ; Grapheme_Base # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDE ; Grapheme_Base # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA +0DE6..0DEF ; Grapheme_Base # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; Grapheme_Base # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; Grapheme_Base # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; Grapheme_Base # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; Grapheme_Base # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E3F ; Grapheme_Base # Sc THAI CURRENCY SYMBOL BAHT +0E40..0E45 ; Grapheme_Base # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Grapheme_Base # Lm THAI CHARACTER MAIYAMOK +0E4F ; Grapheme_Base # Po THAI CHARACTER FONGMAN +0E50..0E59 ; Grapheme_Base # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; Grapheme_Base # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; Grapheme_Base # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Grapheme_Base # Lo LAO LETTER KHO TAM +0E86..0E8A ; Grapheme_Base # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; Grapheme_Base # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; Grapheme_Base # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; Grapheme_Base # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; Grapheme_Base # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; Grapheme_Base # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Grapheme_Base # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Grapheme_Base # Lm LAO KO LA +0ED0..0ED9 ; Grapheme_Base # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; Grapheme_Base # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; Grapheme_Base # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; Grapheme_Base # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; Grapheme_Base # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Grapheme_Base # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Grapheme_Base # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F1A..0F1F ; Grapheme_Base # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; Grapheme_Base # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; Grapheme_Base # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; Grapheme_Base # So TIBETAN MARK BSDUS RTAGS +0F36 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F38 ; Grapheme_Base # So TIBETAN MARK CHE MGO +0F3A ; Grapheme_Base # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; Grapheme_Base # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; Grapheme_Base # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; Grapheme_Base # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; Grapheme_Base # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; Grapheme_Base # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Grapheme_Base # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F7F ; Grapheme_Base # Mc TIBETAN SIGN RNAM BCAD +0F85 ; Grapheme_Base # Po TIBETAN MARK PALUTA +0F88..0F8C ; Grapheme_Base # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0FBE..0FC5 ; Grapheme_Base # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC7..0FCC ; Grapheme_Base # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; Grapheme_Base # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; Grapheme_Base # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; Grapheme_Base # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; Grapheme_Base # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; Grapheme_Base # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Grapheme_Base # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +1031 ; Grapheme_Base # Mc MYANMAR VOWEL SIGN E +1038 ; Grapheme_Base # Mc MYANMAR SIGN VISARGA +103B..103C ; Grapheme_Base # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103F ; Grapheme_Base # Lo MYANMAR LETTER GREAT SA +1040..1049 ; Grapheme_Base # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; Grapheme_Base # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; Grapheme_Base # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Grapheme_Base # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +105A..105D ; Grapheme_Base # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; Grapheme_Base # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Grapheme_Base # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Grapheme_Base # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Grapheme_Base # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Grapheme_Base # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; Grapheme_Base # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1083..1084 ; Grapheme_Base # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1087..108C ; Grapheme_Base # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108E ; Grapheme_Base # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Grapheme_Base # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; Grapheme_Base # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; Grapheme_Base # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109E..109F ; Grapheme_Base # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; Grapheme_Base # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER YN +10CD ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Grapheme_Base # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; Grapheme_Base # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; Grapheme_Base # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Grapheme_Base # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; Grapheme_Base # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Grapheme_Base # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Grapheme_Base # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Grapheme_Base # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Grapheme_Base # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Grapheme_Base # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1360..1368 ; Grapheme_Base # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; Grapheme_Base # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; Grapheme_Base # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; Grapheme_Base # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +13A0..13F5 ; Grapheme_Base # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Grapheme_Base # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1400 ; Grapheme_Base # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; Grapheme_Base # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; Grapheme_Base # So CANADIAN SYLLABICS CHI SIGN +166E ; Grapheme_Base # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; Grapheme_Base # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1680 ; Grapheme_Base # Zs OGHAM SPACE MARK +1681..169A ; Grapheme_Base # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; Grapheme_Base # Ps OGHAM FEATHER MARK +169C ; Grapheme_Base # Pe OGHAM REVERSED FEATHER MARK +16A0..16EA ; Grapheme_Base # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; Grapheme_Base # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; Grapheme_Base # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; Grapheme_Base # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; Grapheme_Base # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; Grapheme_Base # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1735..1736 ; Grapheme_Base # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; Grapheme_Base # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; Grapheme_Base # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Grapheme_Base # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; Grapheme_Base # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; Grapheme_Base # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; Grapheme_Base # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; Grapheme_Base # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D4..17D6 ; Grapheme_Base # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; Grapheme_Base # Lm KHMER SIGN LEK TOO +17D8..17DA ; Grapheme_Base # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; Grapheme_Base # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; Grapheme_Base # Lo KHMER SIGN AVAKRAHASANYA +17E0..17E9 ; Grapheme_Base # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; Grapheme_Base # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; Grapheme_Base # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; Grapheme_Base # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; Grapheme_Base # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +1810..1819 ; Grapheme_Base # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; Grapheme_Base # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Grapheme_Base # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; Grapheme_Base # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; Grapheme_Base # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; Grapheme_Base # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; Grapheme_Base # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; Grapheme_Base # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; Grapheme_Base # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1923..1926 ; Grapheme_Base # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; Grapheme_Base # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Grapheme_Base # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; Grapheme_Base # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1940 ; Grapheme_Base # So LIMBU SIGN LOO +1944..1945 ; Grapheme_Base # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; Grapheme_Base # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; Grapheme_Base # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Grapheme_Base # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; Grapheme_Base # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; Grapheme_Base # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; Grapheme_Base # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; Grapheme_Base # No NEW TAI LUE THAM DIGIT ONE +19DE..19FF ; Grapheme_Base # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC +1A00..1A16 ; Grapheme_Base # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A19..1A1A ; Grapheme_Base # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1E..1A1F ; Grapheme_Base # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; Grapheme_Base # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Grapheme_Base # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; Grapheme_Base # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A61 ; Grapheme_Base # Mc TAI THAM VOWEL SIGN A +1A63..1A64 ; Grapheme_Base # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A6D..1A72 ; Grapheme_Base # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A80..1A89 ; Grapheme_Base # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Grapheme_Base # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; Grapheme_Base # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; Grapheme_Base # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; Grapheme_Base # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B04 ; Grapheme_Base # Mc BALINESE SIGN BISAH +1B05..1B33 ; Grapheme_Base # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B3E..1B41 ; Grapheme_Base # Mc [4] BALINESE VOWEL SIGN TALING..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B45..1B4C ; Grapheme_Base # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B4E..1B4F ; Grapheme_Base # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN +1B50..1B59 ; Grapheme_Base # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; Grapheme_Base # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; Grapheme_Base # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B74..1B7C ; Grapheme_Base # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7F ; Grapheme_Base # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK +1B82 ; Grapheme_Base # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Grapheme_Base # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Grapheme_Base # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; Grapheme_Base # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAE..1BAF ; Grapheme_Base # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; Grapheme_Base # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; Grapheme_Base # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; Grapheme_Base # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; Grapheme_Base # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; Grapheme_Base # Mc BATAK VOWEL SIGN U +1BFC..1BFF ; Grapheme_Base # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; Grapheme_Base # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Grapheme_Base # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; Grapheme_Base # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C3B..1C3F ; Grapheme_Base # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; Grapheme_Base # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; Grapheme_Base # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; Grapheme_Base # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; Grapheme_Base # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Grapheme_Base # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; Grapheme_Base # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C8A ; Grapheme_Base # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE +1C90..1CBA ; Grapheme_Base # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Grapheme_Base # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; Grapheme_Base # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD3 ; Grapheme_Base # Po VEDIC SIGN NIHSHVASA +1CE1 ; Grapheme_Base # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; Grapheme_Base # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; Grapheme_Base # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Grapheme_Base # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; Grapheme_Base # Mc VEDIC SIGN ATIKRAMA +1CFA ; Grapheme_Base # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; Grapheme_Base # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Grapheme_Base # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Grapheme_Base # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Grapheme_Base # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Grapheme_Base # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Grapheme_Base # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; Grapheme_Base # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Grapheme_Base # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Grapheme_Base # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Grapheme_Base # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Grapheme_Base # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Grapheme_Base # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Grapheme_Base # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Grapheme_Base # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Grapheme_Base # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Grapheme_Base # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Grapheme_Base # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; Grapheme_Base # Sk GREEK KORONIS +1FBE ; Grapheme_Base # L& GREEK PROSGEGRAMMENI +1FBF..1FC1 ; Grapheme_Base # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; Grapheme_Base # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Grapheme_Base # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; Grapheme_Base # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; Grapheme_Base # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Grapheme_Base # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; Grapheme_Base # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; Grapheme_Base # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; Grapheme_Base # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; Grapheme_Base # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Grapheme_Base # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; Grapheme_Base # Sk [2] GREEK OXIA..GREEK DASIA +2000..200A ; Grapheme_Base # Zs [11] EN QUAD..HAIR SPACE +2010..2015 ; Grapheme_Base # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Grapheme_Base # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Grapheme_Base # Pi LEFT SINGLE QUOTATION MARK +2019 ; Grapheme_Base # Pf RIGHT SINGLE QUOTATION MARK +201A ; Grapheme_Base # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Grapheme_Base # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Grapheme_Base # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Grapheme_Base # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Grapheme_Base # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Grapheme_Base # Po [8] DAGGER..HYPHENATION POINT +202F ; Grapheme_Base # Zs NARROW NO-BREAK SPACE +2030..2038 ; Grapheme_Base # Po [9] PER MILLE SIGN..CARET +2039 ; Grapheme_Base # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Grapheme_Base # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Grapheme_Base # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; Grapheme_Base # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; Grapheme_Base # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Grapheme_Base # Sm FRACTION SLASH +2045 ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Grapheme_Base # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Grapheme_Base # Sm COMMERCIAL MINUS SIGN +2053 ; Grapheme_Base # Po SWUNG DASH +2054 ; Grapheme_Base # Pc INVERTED UNDERTIE +2055..205E ; Grapheme_Base # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; Grapheme_Base # Zs MEDIUM MATHEMATICAL SPACE +2070 ; Grapheme_Base # No SUPERSCRIPT ZERO +2071 ; Grapheme_Base # Lm SUPERSCRIPT LATIN SMALL LETTER I +2074..2079 ; Grapheme_Base # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; Grapheme_Base # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Grapheme_Base # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Grapheme_Base # Pe SUPERSCRIPT RIGHT PARENTHESIS +207F ; Grapheme_Base # Lm SUPERSCRIPT LATIN SMALL LETTER N +2080..2089 ; Grapheme_Base # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; Grapheme_Base # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Grapheme_Base # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS +2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20A0..20C0 ; Grapheme_Base # Sc [33] EURO-CURRENCY SIGN..SOM SIGN +2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL C +2103..2106 ; Grapheme_Base # So [4] DEGREE CELSIUS..CADA UNA +2107 ; Grapheme_Base # L& EULER CONSTANT +2108..2109 ; Grapheme_Base # So [2] SCRUPLE..DEGREE FAHRENHEIT +210A..2113 ; Grapheme_Base # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2114 ; Grapheme_Base # So L B BAR SYMBOL +2115 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL N +2116..2117 ; Grapheme_Base # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +2118 ; Grapheme_Base # Sm SCRIPT CAPITAL P +2119..211D ; Grapheme_Base # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2123 ; Grapheme_Base # So [6] PRESCRIPTION TAKE..VERSICLE +2124 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL Z +2125 ; Grapheme_Base # So OUNCE SIGN +2126 ; Grapheme_Base # L& OHM SIGN +2127 ; Grapheme_Base # So INVERTED OHM SIGN +2128 ; Grapheme_Base # L& BLACK-LETTER CAPITAL Z +2129 ; Grapheme_Base # So TURNED GREEK SMALL LETTER IOTA +212A..212D ; Grapheme_Base # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; Grapheme_Base # So ESTIMATED SYMBOL +212F..2134 ; Grapheme_Base # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; Grapheme_Base # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Grapheme_Base # L& INFORMATION SOURCE +213A..213B ; Grapheme_Base # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; Grapheme_Base # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Grapheme_Base # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Grapheme_Base # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; Grapheme_Base # So PROPERTY LINE +214B ; Grapheme_Base # Sm TURNED AMPERSAND +214C..214D ; Grapheme_Base # So [2] PER SIGN..AKTIESELSKAB +214E ; Grapheme_Base # L& TURNED SMALL F +214F ; Grapheme_Base # So SYMBOL FOR SAMARITAN SOURCE +2150..215F ; Grapheme_Base # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2160..2182 ; Grapheme_Base # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Grapheme_Base # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Grapheme_Base # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2189 ; Grapheme_Base # No VULGAR FRACTION ZERO THIRDS +218A..218B ; Grapheme_Base # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; Grapheme_Base # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Grapheme_Base # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Grapheme_Base # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Grapheme_Base # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Grapheme_Base # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Grapheme_Base # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Grapheme_Base # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Grapheme_Base # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Grapheme_Base # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Grapheme_Base # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Grapheme_Base # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Grapheme_Base # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Grapheme_Base # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Grapheme_Base # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Grapheme_Base # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Grapheme_Base # So DOWNWARDS DOUBLE ARROW +21D4 ; Grapheme_Base # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Grapheme_Base # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Grapheme_Base # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Grapheme_Base # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Grapheme_Base # Ps LEFT CEILING +2309 ; Grapheme_Base # Pe RIGHT CEILING +230A ; Grapheme_Base # Ps LEFT FLOOR +230B ; Grapheme_Base # Pe RIGHT FLOOR +230C..231F ; Grapheme_Base # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Grapheme_Base # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Grapheme_Base # So [7] FROWN..KEYBOARD +2329 ; Grapheme_Base # Ps LEFT-POINTING ANGLE BRACKET +232A ; Grapheme_Base # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Grapheme_Base # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Grapheme_Base # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Grapheme_Base # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Grapheme_Base # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Grapheme_Base # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Grapheme_Base # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2429 ; Grapheme_Base # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM +2440..244A ; Grapheme_Base # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; Grapheme_Base # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; Grapheme_Base # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA..24FF ; Grapheme_Base # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; Grapheme_Base # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Grapheme_Base # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Grapheme_Base # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Grapheme_Base # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Grapheme_Base # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Grapheme_Base # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Grapheme_Base # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Grapheme_Base # Sm MUSIC SHARP SIGN +2670..2767 ; Grapheme_Base # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Grapheme_Base # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Grapheme_Base # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Grapheme_Base # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Grapheme_Base # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Grapheme_Base # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Grapheme_Base # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Grapheme_Base # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Grapheme_Base # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Grapheme_Base # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Grapheme_Base # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Grapheme_Base # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Grapheme_Base # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Grapheme_Base # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Grapheme_Base # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; Grapheme_Base # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794..27BF ; Grapheme_Base # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Grapheme_Base # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Grapheme_Base # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Grapheme_Base # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Grapheme_Base # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Grapheme_Base # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Grapheme_Base # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Grapheme_Base # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Grapheme_Base # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Grapheme_Base # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Grapheme_Base # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Grapheme_Base # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Grapheme_Base # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Grapheme_Base # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Grapheme_Base # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Grapheme_Base # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Grapheme_Base # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Grapheme_Base # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Grapheme_Base # Ps LEFT WHITE CURLY BRACKET +2984 ; Grapheme_Base # Pe RIGHT WHITE CURLY BRACKET +2985 ; Grapheme_Base # Ps LEFT WHITE PARENTHESIS +2986 ; Grapheme_Base # Pe RIGHT WHITE PARENTHESIS +2987 ; Grapheme_Base # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Grapheme_Base # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Grapheme_Base # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Grapheme_Base # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Grapheme_Base # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Grapheme_Base # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Grapheme_Base # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Grapheme_Base # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Grapheme_Base # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Grapheme_Base # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Grapheme_Base # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Grapheme_Base # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Grapheme_Base # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Grapheme_Base # Ps LEFT WIGGLY FENCE +29D9 ; Grapheme_Base # Pe RIGHT WIGGLY FENCE +29DA ; Grapheme_Base # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Grapheme_Base # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Grapheme_Base # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Grapheme_Base # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Grapheme_Base # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Grapheme_Base # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Grapheme_Base # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Grapheme_Base # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Grapheme_Base # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Grapheme_Base # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Grapheme_Base # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; Grapheme_Base # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; Grapheme_Base # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2C00..2C7B ; Grapheme_Base # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Grapheme_Base # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; Grapheme_Base # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CE5..2CEA ; Grapheme_Base # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; Grapheme_Base # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Grapheme_Base # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2CF9..2CFC ; Grapheme_Base # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; Grapheme_Base # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; Grapheme_Base # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2D00..2D25 ; Grapheme_Base # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Grapheme_Base # L& GEORGIAN SMALL LETTER YN +2D2D ; Grapheme_Base # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Grapheme_Base # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Grapheme_Base # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; Grapheme_Base # Po TIFINAGH SEPARATOR MARK +2D80..2D96 ; Grapheme_Base # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2E00..2E01 ; Grapheme_Base # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Grapheme_Base # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Grapheme_Base # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Grapheme_Base # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Grapheme_Base # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Grapheme_Base # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Grapheme_Base # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Grapheme_Base # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Grapheme_Base # Po RAISED SQUARE +2E0C ; Grapheme_Base # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Grapheme_Base # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Grapheme_Base # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Grapheme_Base # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Grapheme_Base # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Grapheme_Base # Pd HYPHEN WITH DIAERESIS +2E1B ; Grapheme_Base # Po TILDE WITH RING ABOVE +2E1C ; Grapheme_Base # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Grapheme_Base # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Grapheme_Base # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Grapheme_Base # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Grapheme_Base # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Grapheme_Base # Ps TOP LEFT HALF BRACKET +2E23 ; Grapheme_Base # Pe TOP RIGHT HALF BRACKET +2E24 ; Grapheme_Base # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Grapheme_Base # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Grapheme_Base # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Grapheme_Base # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Grapheme_Base # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Grapheme_Base # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Grapheme_Base # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Grapheme_Base # Lm VERTICAL TILDE +2E30..2E39 ; Grapheme_Base # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Grapheme_Base # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Grapheme_Base # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Grapheme_Base # Pd DOUBLE HYPHEN +2E41 ; Grapheme_Base # Po REVERSED COMMA +2E42 ; Grapheme_Base # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; Grapheme_Base # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; Grapheme_Base # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; Grapheme_Base # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Grapheme_Base # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Grapheme_Base # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Grapheme_Base # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Grapheme_Base # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; Grapheme_Base # Pd OBLIQUE HYPHEN +2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFF ; Grapheme_Base # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION +3000 ; Grapheme_Base # Zs IDEOGRAPHIC SPACE +3001..3003 ; Grapheme_Base # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; Grapheme_Base # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3005 ; Grapheme_Base # Lm IDEOGRAPHIC ITERATION MARK +3006 ; Grapheme_Base # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Grapheme_Base # Nl IDEOGRAPHIC NUMBER ZERO +3008 ; Grapheme_Base # Ps LEFT ANGLE BRACKET +3009 ; Grapheme_Base # Pe RIGHT ANGLE BRACKET +300A ; Grapheme_Base # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Grapheme_Base # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Grapheme_Base # Ps LEFT CORNER BRACKET +300D ; Grapheme_Base # Pe RIGHT CORNER BRACKET +300E ; Grapheme_Base # Ps LEFT WHITE CORNER BRACKET +300F ; Grapheme_Base # Pe RIGHT WHITE CORNER BRACKET +3010 ; Grapheme_Base # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Grapheme_Base # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Grapheme_Base # So [2] POSTAL MARK..GETA MARK +3014 ; Grapheme_Base # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Grapheme_Base # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Grapheme_Base # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Grapheme_Base # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Grapheme_Base # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Grapheme_Base # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Grapheme_Base # Ps LEFT WHITE SQUARE BRACKET +301B ; Grapheme_Base # Pe RIGHT WHITE SQUARE BRACKET +301C ; Grapheme_Base # Pd WAVE DASH +301D ; Grapheme_Base # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Grapheme_Base # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Grapheme_Base # So POSTAL MARK FACE +3021..3029 ; Grapheme_Base # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3030 ; Grapheme_Base # Pd WAVY DASH +3031..3035 ; Grapheme_Base # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; Grapheme_Base # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +3038..303A ; Grapheme_Base # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Grapheme_Base # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; Grapheme_Base # Lo MASU MARK +303D ; Grapheme_Base # Po PART ALTERNATION MARK +303E..303F ; Grapheme_Base # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +3041..3096 ; Grapheme_Base # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309B..309C ; Grapheme_Base # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; Grapheme_Base # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Grapheme_Base # Lo HIRAGANA DIGRAPH YORI +30A0 ; Grapheme_Base # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA ; Grapheme_Base # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; Grapheme_Base # Po KATAKANA MIDDLE DOT +30FC..30FE ; Grapheme_Base # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; Grapheme_Base # Lo KATAKANA DIGRAPH KOTO +3105..312F ; Grapheme_Base # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; Grapheme_Base # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; Grapheme_Base # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Grapheme_Base # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31C0..31E5 ; Grapheme_Base # So [38] CJK STROKE T..CJK STROKE SZP +31EF ; Grapheme_Base # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION +31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; Grapheme_Base # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Grapheme_Base # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Grapheme_Base # So PARTNERSHIP SIGN +3251..325F ; Grapheme_Base # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +3260..327F ; Grapheme_Base # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL +3280..3289 ; Grapheme_Base # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Grapheme_Base # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; Grapheme_Base # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..33FF ; Grapheme_Base # So [320] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL +3400..4DBF ; Grapheme_Base # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4DC0..4DFF ; Grapheme_Base # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +4E00..A014 ; Grapheme_Base # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; Grapheme_Base # Lm YI SYLLABLE WU +A016..A48C ; Grapheme_Base # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; Grapheme_Base # So [55] YI RADICAL QOT..YI RADICAL KE +A4D0..A4F7 ; Grapheme_Base # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Grapheme_Base # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; Grapheme_Base # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; Grapheme_Base # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Grapheme_Base # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; Grapheme_Base # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; Grapheme_Base # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; Grapheme_Base # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; Grapheme_Base # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; Grapheme_Base # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Grapheme_Base # Lo CYRILLIC LETTER MULTIOCULAR O +A673 ; Grapheme_Base # Po SLAVONIC ASTERISK +A67E ; Grapheme_Base # Po CYRILLIC KAVYKA +A67F ; Grapheme_Base # Lm CYRILLIC PAYEROK +A680..A69B ; Grapheme_Base # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Grapheme_Base # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; Grapheme_Base # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Grapheme_Base # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F2..A6F7 ; Grapheme_Base # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A700..A716 ; Grapheme_Base # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Grapheme_Base # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Grapheme_Base # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A722..A76F ; Grapheme_Base # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Grapheme_Base # Lm MODIFIER LETTER US +A771..A787 ; Grapheme_Base # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; Grapheme_Base # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; Grapheme_Base # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CD ; Grapheme_Base # L& [62] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D0..A7D1 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Grapheme_Base # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7DC ; Grapheme_Base # L& [8] LATIN SMALL LETTER DOUBLE WYNN..LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F2..A7F4 ; Grapheme_Base # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; Grapheme_Base # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; Grapheme_Base # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Grapheme_Base # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; Grapheme_Base # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; Grapheme_Base # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; Grapheme_Base # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; Grapheme_Base # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Grapheme_Base # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; Grapheme_Base # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; Grapheme_Base # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A830..A835 ; Grapheme_Base # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Grapheme_Base # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Grapheme_Base # Sc NORTH INDIC RUPEE MARK +A839 ; Grapheme_Base # So NORTH INDIC QUANTITY MARK +A840..A873 ; Grapheme_Base # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; Grapheme_Base # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +A880..A881 ; Grapheme_Base # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Grapheme_Base # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Grapheme_Base # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8CE..A8CF ; Grapheme_Base # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; Grapheme_Base # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8F2..A8F7 ; Grapheme_Base # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; Grapheme_Base # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; Grapheme_Base # Lo DEVANAGARI HEADSTROKE +A8FC ; Grapheme_Base # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; Grapheme_Base # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A900..A909 ; Grapheme_Base # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; Grapheme_Base # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A92E..A92F ; Grapheme_Base # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; Grapheme_Base # Lo [23] REJANG LETTER KA..REJANG LETTER A +A952 ; Grapheme_Base # Mc REJANG CONSONANT SIGN H +A95F ; Grapheme_Base # Po REJANG SECTION MARK +A960..A97C ; Grapheme_Base # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A983 ; Grapheme_Base # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Grapheme_Base # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; Grapheme_Base # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; Grapheme_Base # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9BF ; Grapheme_Base # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +A9C1..A9CD ; Grapheme_Base # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; Grapheme_Base # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; Grapheme_Base # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; Grapheme_Base # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; Grapheme_Base # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; Grapheme_Base # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; Grapheme_Base # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; Grapheme_Base # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; Grapheme_Base # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; Grapheme_Base # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA2F..AA30 ; Grapheme_Base # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; Grapheme_Base # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA40..AA42 ; Grapheme_Base # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; Grapheme_Base # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4D ; Grapheme_Base # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; Grapheme_Base # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; Grapheme_Base # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; Grapheme_Base # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Grapheme_Base # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Grapheme_Base # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; Grapheme_Base # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; Grapheme_Base # Lo MYANMAR LETTER AITON RA +AA7B ; Grapheme_Base # Mc MYANMAR SIGN PAO KAREN TONE +AA7D ; Grapheme_Base # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; Grapheme_Base # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; Grapheme_Base # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; Grapheme_Base # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; Grapheme_Base # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; Grapheme_Base # Lo TAI VIET TONE MAI NUENG +AAC2 ; Grapheme_Base # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Grapheme_Base # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Grapheme_Base # Lm TAI VIET SYMBOL SAM +AADE..AADF ; Grapheme_Base # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; Grapheme_Base # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Grapheme_Base # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Grapheme_Base # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Grapheme_Base # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; Grapheme_Base # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; Grapheme_Base # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Grapheme_Base # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Grapheme_Base # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Grapheme_Base # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; Grapheme_Base # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB70..ABBF ; Grapheme_Base # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; Grapheme_Base # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; Grapheme_Base # Po MEETEI MAYEK CHEIKHEI +ABEC ; Grapheme_Base # Mc MEETEI MAYEK LUM IYEK +ABF0..ABF9 ; Grapheme_Base # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; Grapheme_Base # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Grapheme_Base # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Grapheme_Base # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; Grapheme_Base # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Grapheme_Base # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; Grapheme_Base # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Grapheme_Base # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; Grapheme_Base # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; Grapheme_Base # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; Grapheme_Base # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; Grapheme_Base # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Grapheme_Base # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Grapheme_Base # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Grapheme_Base # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Grapheme_Base # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; Grapheme_Base # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; Grapheme_Base # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D ; Grapheme_Base # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD3E ; Grapheme_Base # Pe ORNATE LEFT PARENTHESIS +FD3F ; Grapheme_Base # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; Grapheme_Base # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD50..FD8F ; Grapheme_Base # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Grapheme_Base # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDCF ; Grapheme_Base # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDF0..FDFB ; Grapheme_Base # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; Grapheme_Base # Sc RIAL SIGN +FDFD..FDFF ; Grapheme_Base # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE10..FE16 ; Grapheme_Base # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; Grapheme_Base # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; Grapheme_Base # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; Grapheme_Base # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; Grapheme_Base # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; Grapheme_Base # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; Grapheme_Base # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; Grapheme_Base # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; Grapheme_Base # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Grapheme_Base # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; Grapheme_Base # Pd SMALL EM DASH +FE59 ; Grapheme_Base # Ps SMALL LEFT PARENTHESIS +FE5A ; Grapheme_Base # Pe SMALL RIGHT PARENTHESIS +FE5B ; Grapheme_Base # Ps SMALL LEFT CURLY BRACKET +FE5C ; Grapheme_Base # Pe SMALL RIGHT CURLY BRACKET +FE5D ; Grapheme_Base # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; Grapheme_Base # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; Grapheme_Base # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; Grapheme_Base # Sm SMALL PLUS SIGN +FE63 ; Grapheme_Base # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Grapheme_Base # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Grapheme_Base # Po SMALL REVERSE SOLIDUS +FE69 ; Grapheme_Base # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; Grapheme_Base # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FE70..FE74 ; Grapheme_Base # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Grapheme_Base # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF01..FF03 ; Grapheme_Base # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; Grapheme_Base # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; Grapheme_Base # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; Grapheme_Base # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; Grapheme_Base # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; Grapheme_Base # Po FULLWIDTH ASTERISK +FF0B ; Grapheme_Base # Sm FULLWIDTH PLUS SIGN +FF0C ; Grapheme_Base # Po FULLWIDTH COMMA +FF0D ; Grapheme_Base # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; Grapheme_Base # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; Grapheme_Base # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; Grapheme_Base # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; Grapheme_Base # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; Grapheme_Base # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF21..FF3A ; Grapheme_Base # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3B ; Grapheme_Base # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; Grapheme_Base # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; Grapheme_Base # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; Grapheme_Base # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; Grapheme_Base # Pc FULLWIDTH LOW LINE +FF40 ; Grapheme_Base # Sk FULLWIDTH GRAVE ACCENT +FF41..FF5A ; Grapheme_Base # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF5B ; Grapheme_Base # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; Grapheme_Base # Sm FULLWIDTH VERTICAL LINE +FF5D ; Grapheme_Base # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; Grapheme_Base # Sm FULLWIDTH TILDE +FF5F ; Grapheme_Base # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; Grapheme_Base # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; Grapheme_Base # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Grapheme_Base # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Grapheme_Base # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Grapheme_Base # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F ; Grapheme_Base # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; Grapheme_Base # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; Grapheme_Base # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FFA0..FFBE ; Grapheme_Base # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Grapheme_Base # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Grapheme_Base # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Grapheme_Base # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Grapheme_Base # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +FFE0..FFE1 ; Grapheme_Base # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; Grapheme_Base # Sm FULLWIDTH NOT SIGN +FFE3 ; Grapheme_Base # Sk FULLWIDTH MACRON +FFE4 ; Grapheme_Base # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; Grapheme_Base # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; Grapheme_Base # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; Grapheme_Base # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; Grapheme_Base # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10000..1000B ; Grapheme_Base # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Grapheme_Base # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Grapheme_Base # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Grapheme_Base # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Grapheme_Base # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Grapheme_Base # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Grapheme_Base # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100..10102 ; Grapheme_Base # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; Grapheme_Base # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Grapheme_Base # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10140..10174 ; Grapheme_Base # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; Grapheme_Base # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; Grapheme_Base # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; Grapheme_Base # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C..1018E ; Grapheme_Base # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN +10190..1019C ; Grapheme_Base # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; Grapheme_Base # So GREEK SYMBOL TAU RHO +101D0..101FC ; Grapheme_Base # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +10280..1029C ; Grapheme_Base # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; Grapheme_Base # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E1..102FB ; Grapheme_Base # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +10300..1031F ; Grapheme_Base # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; Grapheme_Base # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..10340 ; Grapheme_Base # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; Grapheme_Base # Nl GOTHIC LETTER NINETY +10342..10349 ; Grapheme_Base # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Grapheme_Base # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; Grapheme_Base # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; Grapheme_Base # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; Grapheme_Base # Po UGARITIC WORD DIVIDER +103A0..103C3 ; Grapheme_Base # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Grapheme_Base # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; Grapheme_Base # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; Grapheme_Base # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; Grapheme_Base # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; Grapheme_Base # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; Grapheme_Base # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; Grapheme_Base # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Grapheme_Base # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; Grapheme_Base # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; Grapheme_Base # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; Grapheme_Base # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; Grapheme_Base # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Grapheme_Base # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Grapheme_Base # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Grapheme_Base # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Grapheme_Base # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Grapheme_Base # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Grapheme_Base # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Grapheme_Base # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; Grapheme_Base # Lo [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; Grapheme_Base # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; Grapheme_Base # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; Grapheme_Base # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; Grapheme_Base # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Grapheme_Base # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Grapheme_Base # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; Grapheme_Base # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Grapheme_Base # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Grapheme_Base # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Grapheme_Base # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Grapheme_Base # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; Grapheme_Base # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10857 ; Grapheme_Base # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; Grapheme_Base # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; Grapheme_Base # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; Grapheme_Base # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; Grapheme_Base # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; Grapheme_Base # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF ; Grapheme_Base # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108E0..108F2 ; Grapheme_Base # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; Grapheme_Base # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF ; Grapheme_Base # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; Grapheme_Base # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; Grapheme_Base # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; Grapheme_Base # Po PHOENICIAN WORD SEPARATOR +10920..10939 ; Grapheme_Base # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; Grapheme_Base # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; Grapheme_Base # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD ; Grapheme_Base # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; Grapheme_Base # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; Grapheme_Base # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; Grapheme_Base # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; Grapheme_Base # Lo KHAROSHTHI LETTER A +10A10..10A13 ; Grapheme_Base # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Grapheme_Base # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; Grapheme_Base # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A40..10A48 ; Grapheme_Base # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58 ; Grapheme_Base # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A60..10A7C ; Grapheme_Base # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; Grapheme_Base # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; Grapheme_Base # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; Grapheme_Base # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; Grapheme_Base # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AC0..10AC7 ; Grapheme_Base # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; Grapheme_Base # So MANICHAEAN SIGN UD +10AC9..10AE4 ; Grapheme_Base # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AEB..10AEF ; Grapheme_Base # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; Grapheme_Base # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B00..10B35 ; Grapheme_Base # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; Grapheme_Base # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B40..10B55 ; Grapheme_Base # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; Grapheme_Base # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; Grapheme_Base # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; Grapheme_Base # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; Grapheme_Base # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C ; Grapheme_Base # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF ; Grapheme_Base # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10C00..10C48 ; Grapheme_Base # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; Grapheme_Base # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Grapheme_Base # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF ; Grapheme_Base # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D00..10D23 ; Grapheme_Base # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D30..10D39 ; Grapheme_Base # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Grapheme_Base # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; Grapheme_Base # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; Grapheme_Base # Lm GARAY VOWEL LENGTH MARK +10D4F ; Grapheme_Base # Lo GARAY SUKUN +10D50..10D65 ; Grapheme_Base # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6E ; Grapheme_Base # Pd GARAY HYPHEN +10D6F ; Grapheme_Base # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; Grapheme_Base # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; Grapheme_Base # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN +10E60..10E7E ; Grapheme_Base # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10E80..10EA9 ; Grapheme_Base # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Grapheme_Base # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; Grapheme_Base # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; Grapheme_Base # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F51..10F54 ; Grapheme_Base # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; Grapheme_Base # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F70..10F81 ; Grapheme_Base # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F86..10F89 ; Grapheme_Base # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10FB0..10FC4 ; Grapheme_Base # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; Grapheme_Base # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FE0..10FF6 ; Grapheme_Base # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; Grapheme_Base # Mc BRAHMI SIGN CANDRABINDU +11002 ; Grapheme_Base # Mc BRAHMI SIGN VISARGA +11003..11037 ; Grapheme_Base # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11047..1104D ; Grapheme_Base # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11052..11065 ; Grapheme_Base # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11066..1106F ; Grapheme_Base # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11071..11072 ; Grapheme_Base # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; Grapheme_Base # Lo BRAHMI LETTER OLD TAMIL LLA +11082 ; Grapheme_Base # Mc KAITHI SIGN VISARGA +11083..110AF ; Grapheme_Base # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Grapheme_Base # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; Grapheme_Base # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110BB..110BC ; Grapheme_Base # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BE..110C1 ; Grapheme_Base # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; Grapheme_Base # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Grapheme_Base # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; Grapheme_Base # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; Grapheme_Base # Mc CHAKMA VOWEL SIGN E +11136..1113F ; Grapheme_Base # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Grapheme_Base # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; Grapheme_Base # Lo CHAKMA LETTER LHAA +11145..11146 ; Grapheme_Base # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; Grapheme_Base # Lo CHAKMA LETTER VAA +11150..11172 ; Grapheme_Base # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11174..11175 ; Grapheme_Base # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; Grapheme_Base # Lo MAHAJANI LIGATURE SHRI +11182 ; Grapheme_Base # Mc SHARADA SIGN VISARGA +11183..111B2 ; Grapheme_Base # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Grapheme_Base # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF ; Grapheme_Base # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; Grapheme_Base # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Grapheme_Base # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111CD ; Grapheme_Base # Po SHARADA SUTRA MARK +111CE ; Grapheme_Base # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111D0..111D9 ; Grapheme_Base # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; Grapheme_Base # Lo SHARADA EKAM +111DB ; Grapheme_Base # Po SHARADA SIGN SIDDHAM +111DC ; Grapheme_Base # Lo SHARADA HEADSTROKE +111DD..111DF ; Grapheme_Base # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; Grapheme_Base # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; Grapheme_Base # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; Grapheme_Base # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; Grapheme_Base # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; Grapheme_Base # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11238..1123D ; Grapheme_Base # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123F..11240 ; Grapheme_Base # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11280..11286 ; Grapheme_Base # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; Grapheme_Base # Lo MULTANI LETTER GHA +1128A..1128D ; Grapheme_Base # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; Grapheme_Base # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; Grapheme_Base # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; Grapheme_Base # Po MULTANI SECTION MARK +112B0..112DE ; Grapheme_Base # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112E0..112E2 ; Grapheme_Base # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112F0..112F9 ; Grapheme_Base # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11302..11303 ; Grapheme_Base # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; Grapheme_Base # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; Grapheme_Base # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; Grapheme_Base # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; Grapheme_Base # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; Grapheme_Base # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; Grapheme_Base # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; Grapheme_Base # Lo GRANTHA SIGN AVAGRAHA +1133F ; Grapheme_Base # Mc GRANTHA VOWEL SIGN I +11341..11344 ; Grapheme_Base # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Grapheme_Base # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Grapheme_Base # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11350 ; Grapheme_Base # Lo GRANTHA OM +1135D..11361 ; Grapheme_Base # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; Grapheme_Base # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11380..11389 ; Grapheme_Base # Lo [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Grapheme_Base # Lo TULU-TIGALARI LETTER EE +1138E ; Grapheme_Base # Lo TULU-TIGALARI LETTER AI +11390..113B5 ; Grapheme_Base # Lo [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Grapheme_Base # Lo TULU-TIGALARI SIGN AVAGRAHA +113B9..113BA ; Grapheme_Base # Mc [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II +113CA ; Grapheme_Base # Mc TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Grapheme_Base # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113D1 ; Grapheme_Base # Lo TULU-TIGALARI REPHA +113D3 ; Grapheme_Base # Lo TULU-TIGALARI SIGN PLUTA +113D4..113D5 ; Grapheme_Base # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Grapheme_Base # Po [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +11400..11434 ; Grapheme_Base # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; Grapheme_Base # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; Grapheme_Base # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; Grapheme_Base # Mc NEWA SIGN VISARGA +11447..1144A ; Grapheme_Base # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; Grapheme_Base # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; Grapheme_Base # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; Grapheme_Base # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; Grapheme_Base # Po NEWA INSERTION SIGN +1145F..11461 ; Grapheme_Base # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; Grapheme_Base # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B1..114B2 ; Grapheme_Base # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II +114B9 ; Grapheme_Base # Mc TIRHUTA VOWEL SIGN E +114BB..114BC ; Grapheme_Base # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O +114BE ; Grapheme_Base # Mc TIRHUTA VOWEL SIGN AU +114C1 ; Grapheme_Base # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; Grapheme_Base # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; Grapheme_Base # Po TIRHUTA ABBREVIATION SIGN +114C7 ; Grapheme_Base # Lo TIRHUTA OM +114D0..114D9 ; Grapheme_Base # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; Grapheme_Base # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115B0..115B1 ; Grapheme_Base # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II +115B8..115BB ; Grapheme_Base # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; Grapheme_Base # Mc SIDDHAM SIGN VISARGA +115C1..115D7 ; Grapheme_Base # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; Grapheme_Base # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; Grapheme_Base # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; Grapheme_Base # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; Grapheme_Base # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; Grapheme_Base # Mc MODI SIGN VISARGA +11641..11643 ; Grapheme_Base # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; Grapheme_Base # Lo MODI SIGN HUVA +11650..11659 ; Grapheme_Base # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11660..1166C ; Grapheme_Base # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11680..116AA ; Grapheme_Base # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; Grapheme_Base # Mc TAKRI SIGN VISARGA +116AE..116AF ; Grapheme_Base # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B8 ; Grapheme_Base # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; Grapheme_Base # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; Grapheme_Base # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Grapheme_Base # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11700..1171A ; Grapheme_Base # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171E ; Grapheme_Base # Mc AHOM CONSONANT SIGN MEDIAL RA +11720..11721 ; Grapheme_Base # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11726 ; Grapheme_Base # Mc AHOM VOWEL SIGN E +11730..11739 ; Grapheme_Base # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; Grapheme_Base # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; Grapheme_Base # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; Grapheme_Base # So AHOM SYMBOL VI +11740..11746 ; Grapheme_Base # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; Grapheme_Base # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; Grapheme_Base # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; Grapheme_Base # Mc DOGRA SIGN VISARGA +1183B ; Grapheme_Base # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; Grapheme_Base # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; Grapheme_Base # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; Grapheme_Base # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF..11906 ; Grapheme_Base # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; Grapheme_Base # Lo DIVES AKURU LETTER O +1190C..11913 ; Grapheme_Base # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; Grapheme_Base # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; Grapheme_Base # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11931..11935 ; Grapheme_Base # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E +11937..11938 ; Grapheme_Base # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193F ; Grapheme_Base # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; Grapheme_Base # Mc DIVES AKURU MEDIAL YA +11941 ; Grapheme_Base # Lo DIVES AKURU INITIAL RA +11942 ; Grapheme_Base # Mc DIVES AKURU MEDIAL RA +11944..11946 ; Grapheme_Base # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; Grapheme_Base # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; Grapheme_Base # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; Grapheme_Base # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; Grapheme_Base # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; Grapheme_Base # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; Grapheme_Base # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; Grapheme_Base # Po NANDINAGARI SIGN SIDDHAM +119E3 ; Grapheme_Base # Lo NANDINAGARI HEADSTROKE +119E4 ; Grapheme_Base # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; Grapheme_Base # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; Grapheme_Base # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A39 ; Grapheme_Base # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; Grapheme_Base # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3F..11A46 ; Grapheme_Base # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A50 ; Grapheme_Base # Lo SOYOMBO LETTER A +11A57..11A58 ; Grapheme_Base # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A5C..11A89 ; Grapheme_Base # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A97 ; Grapheme_Base # Mc SOYOMBO SIGN VISARGA +11A9A..11A9C ; Grapheme_Base # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; Grapheme_Base # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; Grapheme_Base # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11AF8 ; Grapheme_Base # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11B00..11B09 ; Grapheme_Base # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; Grapheme_Base # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; Grapheme_Base # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; Grapheme_Base # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11C00..11C08 ; Grapheme_Base # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; Grapheme_Base # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; Grapheme_Base # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; Grapheme_Base # Mc BHAIKSUKI SIGN VISARGA +11C40 ; Grapheme_Base # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; Grapheme_Base # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; Grapheme_Base # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; Grapheme_Base # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; Grapheme_Base # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; Grapheme_Base # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11CA9 ; Grapheme_Base # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; Grapheme_Base # Mc MARCHEN VOWEL SIGN I +11CB4 ; Grapheme_Base # Mc MARCHEN VOWEL SIGN O +11D00..11D06 ; Grapheme_Base # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; Grapheme_Base # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; Grapheme_Base # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; Grapheme_Base # Lo MASARAM GONDI REPHA +11D50..11D59 ; Grapheme_Base # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; Grapheme_Base # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; Grapheme_Base # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; Grapheme_Base # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; Grapheme_Base # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; Grapheme_Base # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; Grapheme_Base # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; Grapheme_Base # Lo GUNJALA GONDI OM +11DA0..11DA9 ; Grapheme_Base # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; Grapheme_Base # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF5..11EF6 ; Grapheme_Base # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; Grapheme_Base # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F02 ; Grapheme_Base # Lo KAWI SIGN REPHA +11F03 ; Grapheme_Base # Mc KAWI SIGN VISARGA +11F04..11F10 ; Grapheme_Base # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; Grapheme_Base # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; Grapheme_Base # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; Grapheme_Base # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F43..11F4F ; Grapheme_Base # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11F50..11F59 ; Grapheme_Base # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11FB0 ; Grapheme_Base # Lo LISU LETTER YHA +11FC0..11FD4 ; Grapheme_Base # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FD5..11FDC ; Grapheme_Base # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FDD..11FE0 ; Grapheme_Base # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +11FE1..11FF1 ; Grapheme_Base # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +11FFF ; Grapheme_Base # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; Grapheme_Base # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; Grapheme_Base # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; Grapheme_Base # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; Grapheme_Base # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; Grapheme_Base # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; Grapheme_Base # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342F ; Grapheme_Base # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; Grapheme_Base # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; Grapheme_Base # Lo [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; Grapheme_Base # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Grapheme_Base # Lo [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +1612A..1612C ; Grapheme_Base # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16130..16139 ; Grapheme_Base # Nd [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE +16800..16A38 ; Grapheme_Base # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; Grapheme_Base # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; Grapheme_Base # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; Grapheme_Base # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; Grapheme_Base # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; Grapheme_Base # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; Grapheme_Base # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF5 ; Grapheme_Base # Po BASSA VAH FULL STOP +16B00..16B2F ; Grapheme_Base # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B37..16B3B ; Grapheme_Base # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; Grapheme_Base # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; Grapheme_Base # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; Grapheme_Base # Po PAHAWH HMONG SIGN XAUS +16B45 ; Grapheme_Base # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; Grapheme_Base # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; Grapheme_Base # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; Grapheme_Base # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; Grapheme_Base # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D42 ; Grapheme_Base # Lm [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D43..16D6A ; Grapheme_Base # Lo [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16D6B..16D6C ; Grapheme_Base # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16D6D..16D6F ; Grapheme_Base # Po [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA +16D70..16D79 ; Grapheme_Base # Nd [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +16E40..16E7F ; Grapheme_Base # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; Grapheme_Base # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; Grapheme_Base # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; Grapheme_Base # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; Grapheme_Base # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; Grapheme_Base # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F93..16F9F ; Grapheme_Base # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Grapheme_Base # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK +16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK +17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; Grapheme_Base # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; Grapheme_Base # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; Grapheme_Base # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; Grapheme_Base # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; Grapheme_Base # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; Grapheme_Base # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; Grapheme_Base # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; Grapheme_Base # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Grapheme_Base # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Grapheme_Base # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Grapheme_Base # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; Grapheme_Base # So DUPLOYAN SIGN O WITH CROSS +1BC9F ; Grapheme_Base # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1CC00..1CCEF ; Grapheme_Base # So [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z +1CCF0..1CCF9 ; Grapheme_Base # Nd [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1CD00..1CEB3 ; Grapheme_Base # So [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CF50..1CFC3 ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; Grapheme_Base # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D16A..1D16C ; Grapheme_Base # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D183..1D184 ; Grapheme_Base # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; Grapheme_Base # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1EA ; Grapheme_Base # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D200..1D241 ; Grapheme_Base # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D245 ; Grapheme_Base # So GREEK MUSICAL LEIMMA +1D2C0..1D2D3 ; Grapheme_Base # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN +1D2E0..1D2F3 ; Grapheme_Base # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D300..1D356 ; Grapheme_Base # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D378 ; Grapheme_Base # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; Grapheme_Base # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Grapheme_Base # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Grapheme_Base # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Grapheme_Base # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Grapheme_Base # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Grapheme_Base # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Grapheme_Base # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Grapheme_Base # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Grapheme_Base # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Grapheme_Base # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Grapheme_Base # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Grapheme_Base # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Grapheme_Base # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Grapheme_Base # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Grapheme_Base # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Grapheme_Base # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Grapheme_Base # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Grapheme_Base # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Grapheme_Base # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Grapheme_Base # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Grapheme_Base # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Grapheme_Base # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Grapheme_Base # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Grapheme_Base # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Grapheme_Base # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Grapheme_Base # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Grapheme_Base # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Grapheme_Base # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Grapheme_Base # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Grapheme_Base # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Grapheme_Base # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Grapheme_Base # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Grapheme_Base # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Grapheme_Base # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Grapheme_Base # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Grapheme_Base # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Grapheme_Base # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1D800..1D9FF ; Grapheme_Base # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA37..1DA3A ; Grapheme_Base # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA6D..1DA74 ; Grapheme_Base # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA76..1DA83 ; Grapheme_Base # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA85..1DA86 ; Grapheme_Base # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; Grapheme_Base # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DF00..1DF09 ; Grapheme_Base # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Grapheme_Base # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; Grapheme_Base # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; Grapheme_Base # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; Grapheme_Base # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; Grapheme_Base # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Grapheme_Base # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2F0..1E2F9 ; Grapheme_Base # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E2FF ; Grapheme_Base # Sc WANCHO NGUN SIGN +1E4D0..1E4EA ; Grapheme_Base # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; Grapheme_Base # Lm NAG MUNDARI SIGN OJOD +1E4F0..1E4F9 ; Grapheme_Base # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5D0..1E5ED ; Grapheme_Base # Lo [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; Grapheme_Base # Lo OL ONAL SIGN HODDOND +1E5F1..1E5FA ; Grapheme_Base # Nd [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E5FF ; Grapheme_Base # Po OL ONAL ABBREVIATION SIGN +1E7E0..1E7E6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; Grapheme_Base # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; Grapheme_Base # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; Grapheme_Base # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF ; Grapheme_Base # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E900..1E943 ; Grapheme_Base # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; Grapheme_Base # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; Grapheme_Base # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F ; Grapheme_Base # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1EC71..1ECAB ; Grapheme_Base # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; Grapheme_Base # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; Grapheme_Base # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; Grapheme_Base # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; Grapheme_Base # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; Grapheme_Base # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; Grapheme_Base # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; Grapheme_Base # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1EE00..1EE03 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Grapheme_Base # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Grapheme_Base # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Grapheme_Base # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Grapheme_Base # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F02B ; Grapheme_Base # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; Grapheme_Base # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; Grapheme_Base # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; Grapheme_Base # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; Grapheme_Base # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; Grapheme_Base # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F100..1F10C ; Grapheme_Base # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F1AD ; Grapheme_Base # So [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL +1F1E6..1F202 ; Grapheme_Base # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA +1F210..1F23B ; Grapheme_Base # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; Grapheme_Base # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; Grapheme_Base # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265 ; Grapheme_Base # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; Grapheme_Base # So [251] CYCLONE..AMPHORA +1F3FB..1F3FF ; Grapheme_Base # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F6D7 ; Grapheme_Base # So [728] RAT..ELEVATOR +1F6DC..1F6EC ; Grapheme_Base # So [17] WIRELESS..AIRPLANE ARRIVING +1F6F0..1F6FC ; Grapheme_Base # So [13] SATELLITE..ROLLER SKATE +1F700..1F776 ; Grapheme_Base # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F7D9 ; Grapheme_Base # So [95] HAUMEA..NINE POINTED WHITE STAR +1F7E0..1F7EB ; Grapheme_Base # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; Grapheme_Base # So HEAVY EQUALS SIGN +1F800..1F80B ; Grapheme_Base # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; Grapheme_Base # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; Grapheme_Base # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; Grapheme_Base # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; Grapheme_Base # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8BB ; Grapheme_Base # So [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; Grapheme_Base # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F900..1FA53 ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; Grapheme_Base # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA7C ; Grapheme_Base # So [13] BALLET SHOES..CRUTCH +1FA80..1FA89 ; Grapheme_Base # So [10] YO-YO..HARP +1FA8F..1FAC6 ; Grapheme_Base # So [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; Grapheme_Base # So [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; Grapheme_Base # So [11] SPLATTER..FACE WITH BAGS UNDER EYES +1FAF0..1FAF8 ; Grapheme_Base # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND +1FB00..1FB92 ; Grapheme_Base # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBEF ; Grapheme_Base # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE +1FBF0..1FBF9 ; Grapheme_Base # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; Grapheme_Base # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; Grapheme_Base # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Grapheme_Base # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Grapheme_Base # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 152730 + +# ================================================ + +# Derived Property: Grapheme_Link (deprecated) +# Generated from: Canonical_Combining_Class=Virama +# Use Canonical_Combining_Class=Virama directly instead + +094D ; Grapheme_Link # Mn DEVANAGARI SIGN VIRAMA +09CD ; Grapheme_Link # Mn BENGALI SIGN VIRAMA +0A4D ; Grapheme_Link # Mn GURMUKHI SIGN VIRAMA +0ACD ; Grapheme_Link # Mn GUJARATI SIGN VIRAMA +0B4D ; Grapheme_Link # Mn ORIYA SIGN VIRAMA +0BCD ; Grapheme_Link # Mn TAMIL SIGN VIRAMA +0C4D ; Grapheme_Link # Mn TELUGU SIGN VIRAMA +0CCD ; Grapheme_Link # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Grapheme_Link # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D4D ; Grapheme_Link # Mn MALAYALAM SIGN VIRAMA +0DCA ; Grapheme_Link # Mn SINHALA SIGN AL-LAKUNA +0E3A ; Grapheme_Link # Mn THAI CHARACTER PHINTHU +0EBA ; Grapheme_Link # Mn LAO SIGN PALI VIRAMA +0F84 ; Grapheme_Link # Mn TIBETAN MARK HALANTA +1039..103A ; Grapheme_Link # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1714 ; Grapheme_Link # Mn TAGALOG SIGN VIRAMA +1715 ; Grapheme_Link # Mc TAGALOG SIGN PAMUDPOD +1734 ; Grapheme_Link # Mc HANUNOO SIGN PAMUDPOD +17D2 ; Grapheme_Link # Mn KHMER SIGN COENG +1A60 ; Grapheme_Link # Mn TAI THAM SIGN SAKOT +1B44 ; Grapheme_Link # Mc BALINESE ADEG ADEG +1BAA ; Grapheme_Link # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Grapheme_Link # Mn SUNDANESE SIGN VIRAMA +1BF2..1BF3 ; Grapheme_Link # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +2D7F ; Grapheme_Link # Mn TIFINAGH CONSONANT JOINER +A806 ; Grapheme_Link # Mn SYLOTI NAGRI SIGN HASANTA +A82C ; Grapheme_Link # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4 ; Grapheme_Link # Mn SAURASHTRA SIGN VIRAMA +A953 ; Grapheme_Link # Mc REJANG VIRAMA +A9C0 ; Grapheme_Link # Mc JAVANESE PANGKON +AAF6 ; Grapheme_Link # Mn MEETEI MAYEK VIRAMA +ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK +10A3F ; Grapheme_Link # Mn KHAROSHTHI VIRAMA +11046 ; Grapheme_Link # Mn BRAHMI VIRAMA +11070 ; Grapheme_Link # Mn BRAHMI SIGN OLD TAMIL VIRAMA +1107F ; Grapheme_Link # Mn BRAHMI NUMBER JOINER +110B9 ; Grapheme_Link # Mn KAITHI SIGN VIRAMA +11133..11134 ; Grapheme_Link # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Grapheme_Link # Mc SHARADA SIGN VIRAMA +11235 ; Grapheme_Link # Mc KHOJKI SIGN VIRAMA +112EA ; Grapheme_Link # Mn KHUDAWADI SIGN VIRAMA +1134D ; Grapheme_Link # Mc GRANTHA SIGN VIRAMA +113CE ; Grapheme_Link # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Grapheme_Link # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Grapheme_Link # Mn TULU-TIGALARI CONJOINER +11442 ; Grapheme_Link # Mn NEWA SIGN VIRAMA +114C2 ; Grapheme_Link # Mn TIRHUTA SIGN VIRAMA +115BF ; Grapheme_Link # Mn SIDDHAM SIGN VIRAMA +1163F ; Grapheme_Link # Mn MODI SIGN VIRAMA +116B6 ; Grapheme_Link # Mc TAKRI SIGN VIRAMA +1172B ; Grapheme_Link # Mn AHOM SIGN KILLER +11839 ; Grapheme_Link # Mn DOGRA SIGN VIRAMA +1193D ; Grapheme_Link # Mc DIVES AKURU SIGN HALANTA +1193E ; Grapheme_Link # Mn DIVES AKURU VIRAMA +119E0 ; Grapheme_Link # Mn NANDINAGARI SIGN VIRAMA +11A34 ; Grapheme_Link # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Grapheme_Link # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Grapheme_Link # Mn SOYOMBO SUBJOINER +11C3F ; Grapheme_Link # Mn BHAIKSUKI SIGN VIRAMA +11D44..11D45 ; Grapheme_Link # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +11D97 ; Grapheme_Link # Mn GUNJALA GONDI VIRAMA +11F41 ; Grapheme_Link # Mc KAWI SIGN KILLER +11F42 ; Grapheme_Link # Mn KAWI CONJOINER +1612F ; Grapheme_Link # Mn GURUNG KHEMA SIGN THOLHOMA + +# Total code points: 69 + +# ================================================ + +# Derived Property: Indic_Conjunct_Break +# Generated from the Grapheme_Cluster_Break, Indic_Syllabic_Category, +# Canonical_Combining_Class, and Script properties as described in UAX #44: +# https://www.unicode.org/reports/tr44/. + +# All code points not explicitly listed for Indic_Conjunct_Break +# have the value None. + +# @missing: 0000..10FFFF; InCB; None + +# ================================================ + +# Indic_Conjunct_Break=Linker + +094D ; InCB; Linker # Mn DEVANAGARI SIGN VIRAMA +09CD ; InCB; Linker # Mn BENGALI SIGN VIRAMA +0ACD ; InCB; Linker # Mn GUJARATI SIGN VIRAMA +0B4D ; InCB; Linker # Mn ORIYA SIGN VIRAMA +0C4D ; InCB; Linker # Mn TELUGU SIGN VIRAMA +0D4D ; InCB; Linker # Mn MALAYALAM SIGN VIRAMA + +# Total code points: 6 + +# ================================================ + +# Indic_Conjunct_Break=Consonant + +0915..0939 ; InCB; Consonant # Lo [37] DEVANAGARI LETTER KA..DEVANAGARI LETTER HA +0958..095F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA +0978..097F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER MARWARI DDA..DEVANAGARI LETTER BBA +0995..09A8 ; InCB; Consonant # Lo [20] BENGALI LETTER KA..BENGALI LETTER NA +09AA..09B0 ; InCB; Consonant # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; InCB; Consonant # Lo BENGALI LETTER LA +09B6..09B9 ; InCB; Consonant # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09DC..09DD ; InCB; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF ; InCB; Consonant # Lo BENGALI LETTER YYA +09F0..09F1 ; InCB; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +0A95..0AA8 ; InCB; Consonant # Lo [20] GUJARATI LETTER KA..GUJARATI LETTER NA +0AAA..0AB0 ; InCB; Consonant # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; InCB; Consonant # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; InCB; Consonant # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0AF9 ; InCB; Consonant # Lo GUJARATI LETTER ZHA +0B15..0B28 ; InCB; Consonant # Lo [20] ORIYA LETTER KA..ORIYA LETTER NA +0B2A..0B30 ; InCB; Consonant # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; InCB; Consonant # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; InCB; Consonant # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B5C..0B5D ; InCB; Consonant # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F ; InCB; Consonant # Lo ORIYA LETTER YYA +0B71 ; InCB; Consonant # Lo ORIYA LETTER WA +0C15..0C28 ; InCB; Consonant # Lo [20] TELUGU LETTER KA..TELUGU LETTER NA +0C2A..0C39 ; InCB; Consonant # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C58..0C5A ; InCB; Consonant # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0D15..0D3A ; InCB; Consonant # Lo [38] MALAYALAM LETTER KA..MALAYALAM LETTER TTTA + +# Total code points: 240 + +# ================================================ + +# Indic_Conjunct_Break=Extend + +0300..036F ; InCB; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; InCB; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; InCB; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; InCB; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; InCB; Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; InCB; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; InCB; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; InCB; Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; InCB; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; InCB; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; InCB; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; InCB; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; InCB; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; InCB; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; InCB; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; InCB; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; InCB; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; InCB; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; InCB; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; InCB; Extend # Mn NKO DANTAYALAN +0816..0819 ; InCB; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; InCB; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; InCB; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; InCB; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; InCB; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0897..089F ; InCB; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; InCB; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; InCB; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; InCB; Extend # Mn DEVANAGARI VOWEL SIGN OE +093C ; InCB; Extend # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; InCB; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0951..0957 ; InCB; Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; InCB; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; InCB; Extend # Mn BENGALI SIGN CANDRABINDU +09BC ; InCB; Extend # Mn BENGALI SIGN NUKTA +09BE ; InCB; Extend # Mc BENGALI VOWEL SIGN AA +09C1..09C4 ; InCB; Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09D7 ; InCB; Extend # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; InCB; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; InCB; Extend # Mn BENGALI SANDHI MARK +0A01..0A02 ; InCB; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; InCB; Extend # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; InCB; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; InCB; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; InCB; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; InCB; Extend # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; InCB; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; InCB; Extend # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; InCB; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; InCB; Extend # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; InCB; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; InCB; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AE2..0AE3 ; InCB; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; InCB; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; InCB; Extend # Mn ORIYA SIGN CANDRABINDU +0B3C ; InCB; Extend # Mn ORIYA SIGN NUKTA +0B3E ; InCB; Extend # Mc ORIYA VOWEL SIGN AA +0B3F ; InCB; Extend # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; InCB; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B55..0B56 ; InCB; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; InCB; Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; InCB; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; InCB; Extend # Mn TAMIL SIGN ANUSVARA +0BBE ; InCB; Extend # Mc TAMIL VOWEL SIGN AA +0BC0 ; InCB; Extend # Mn TAMIL VOWEL SIGN II +0BCD ; InCB; Extend # Mn TAMIL SIGN VIRAMA +0BD7 ; InCB; Extend # Mc TAMIL AU LENGTH MARK +0C00 ; InCB; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; InCB; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; InCB; Extend # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; InCB; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; InCB; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; InCB; Extend # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; InCB; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; InCB; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; InCB; Extend # Mn KANNADA SIGN CANDRABINDU +0CBC ; InCB; Extend # Mn KANNADA SIGN NUKTA +0CBF ; InCB; Extend # Mn KANNADA VOWEL SIGN I +0CC0 ; InCB; Extend # Mc KANNADA VOWEL SIGN II +0CC2 ; InCB; Extend # Mc KANNADA VOWEL SIGN UU +0CC6 ; InCB; Extend # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; InCB; Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; InCB; Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; InCB; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; InCB; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; InCB; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; InCB; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; InCB; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3E ; InCB; Extend # Mc MALAYALAM VOWEL SIGN AA +0D41..0D44 ; InCB; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D57 ; InCB; Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; InCB; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; InCB; Extend # Mn SINHALA SIGN CANDRABINDU +0DCA ; InCB; Extend # Mn SINHALA SIGN AL-LAKUNA +0DCF ; InCB; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DD2..0DD4 ; InCB; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; InCB; Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DDF ; InCB; Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +0E31 ; InCB; Extend # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; InCB; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; InCB; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; InCB; Extend # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; InCB; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; InCB; Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; InCB; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; InCB; Extend # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; InCB; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; InCB; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; InCB; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; InCB; Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; InCB; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; InCB; Extend # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; InCB; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; InCB; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; InCB; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; InCB; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; InCB; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; InCB; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; InCB; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; InCB; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; InCB; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; InCB; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; InCB; Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; InCB; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; InCB; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; InCB; Extend # Mc TAGALOG SIGN PAMUDPOD +1732..1733 ; InCB; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; InCB; Extend # Mc HANUNOO SIGN PAMUDPOD +1752..1753 ; InCB; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; InCB; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; InCB; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; InCB; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; InCB; Extend # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; InCB; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; InCB; Extend # Mn KHMER SIGN ATTHACAN +180B..180D ; InCB; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; InCB; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; InCB; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; InCB; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; InCB; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; InCB; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; InCB; Extend # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; InCB; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; InCB; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; InCB; Extend # Mn BUGINESE VOWEL SIGN AE +1A56 ; InCB; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; InCB; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; InCB; Extend # Mn TAI THAM SIGN SAKOT +1A62 ; InCB; Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; InCB; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; InCB; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; InCB; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; InCB; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; InCB; Extend # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; InCB; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; InCB; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; InCB; Extend # Mn BALINESE SIGN REREKAN +1B35 ; InCB; Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; InCB; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; InCB; Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; InCB; Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B3D ; InCB; Extend # Mc BALINESE VOWEL SIGN LA LENGA TEDUNG +1B42 ; InCB; Extend # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; InCB; Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B6B..1B73 ; InCB; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; InCB; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; InCB; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; InCB; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; InCB; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; InCB; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; InCB; Extend # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; InCB; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; InCB; Extend # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; InCB; Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; InCB; Extend # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C2C..1C33 ; InCB; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; InCB; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; InCB; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; InCB; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; InCB; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; InCB; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; InCB; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; InCB; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; InCB; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200D ; InCB; Extend # Cf ZERO WIDTH JOINER +20D0..20DC ; InCB; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; InCB; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; InCB; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; InCB; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; InCB; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; InCB; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; InCB; Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; InCB; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; InCB; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; InCB; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; InCB; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; InCB; Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; InCB; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; InCB; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; InCB; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; InCB; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; InCB; Extend # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; InCB; Extend # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; InCB; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; InCB; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; InCB; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; InCB; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; InCB; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; InCB; Extend # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; InCB; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; InCB; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A953 ; InCB; Extend # Mc REJANG VIRAMA +A980..A982 ; InCB; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; InCB; Extend # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; InCB; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; InCB; Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9C0 ; InCB; Extend # Mc JAVANESE PANGKON +A9E5 ; InCB; Extend # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; InCB; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; InCB; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; InCB; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; InCB; Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; InCB; Extend # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; InCB; Extend # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; InCB; Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; InCB; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; InCB; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; InCB; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; InCB; Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; InCB; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; InCB; Extend # Mn MEETEI MAYEK VIRAMA +ABE5 ; InCB; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; InCB; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; InCB; Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; InCB; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; InCB; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; InCB; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; InCB; Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; InCB; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; InCB; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; InCB; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; InCB; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; InCB; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; InCB; Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; InCB; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; InCB; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC..10EFF ; InCB; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; InCB; Extend # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; InCB; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; InCB; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; InCB; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; InCB; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; InCB; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; InCB; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; InCB; Extend # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; InCB; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; InCB; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; InCB; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; InCB; Extend # Mn MAHAJANI SIGN NUKTA +11180..11181 ; InCB; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; InCB; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C0 ; InCB; Extend # Mc SHARADA SIGN VIRAMA +111C9..111CC ; InCB; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; InCB; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; InCB; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; InCB; Extend # Mn KHOJKI SIGN ANUSVARA +11235 ; InCB; Extend # Mc KHOJKI SIGN VIRAMA +11236..11237 ; InCB; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; InCB; Extend # Mn KHOJKI SIGN SUKUN +11241 ; InCB; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; InCB; Extend # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; InCB; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; InCB; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; InCB; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133E ; InCB; Extend # Mc GRANTHA VOWEL SIGN AA +11340 ; InCB; Extend # Mn GRANTHA VOWEL SIGN II +1134D ; InCB; Extend # Mc GRANTHA SIGN VIRAMA +11357 ; InCB; Extend # Mc GRANTHA AU LENGTH MARK +11366..1136C ; InCB; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; InCB; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8 ; InCB; Extend # Mc TULU-TIGALARI VOWEL SIGN AA +113BB..113C0 ; InCB; Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; InCB; Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; InCB; Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; InCB; Extend # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK +113CE ; InCB; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; InCB; Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; InCB; Extend # Mn TULU-TIGALARI CONJOINER +113D2 ; InCB; Extend # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; InCB; Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11438..1143F ; InCB; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; InCB; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; InCB; Extend # Mn NEWA SIGN NUKTA +1145E ; InCB; Extend # Mn NEWA SANDHI MARK +114B0 ; InCB; Extend # Mc TIRHUTA VOWEL SIGN AA +114B3..114B8 ; InCB; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; InCB; Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BD ; InCB; Extend # Mc TIRHUTA VOWEL SIGN SHORT O +114BF..114C0 ; InCB; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; InCB; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF ; InCB; Extend # Mc SIDDHAM VOWEL SIGN AA +115B2..115B5 ; InCB; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; InCB; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; InCB; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; InCB; Extend # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; InCB; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; InCB; Extend # Mn MODI SIGN ANUSVARA +1163F..11640 ; InCB; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; InCB; Extend # Mn TAKRI SIGN ANUSVARA +116AD ; InCB; Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; InCB; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; InCB; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; InCB; Extend # Mn TAKRI SIGN NUKTA +1171D ; InCB; Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; InCB; Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; InCB; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; InCB; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; InCB; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; InCB; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +11930 ; InCB; Extend # Mc DIVES AKURU VOWEL SIGN AA +1193B..1193C ; InCB; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; InCB; Extend # Mc DIVES AKURU SIGN HALANTA +1193E ; InCB; Extend # Mn DIVES AKURU VIRAMA +11943 ; InCB; Extend # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; InCB; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; InCB; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; InCB; Extend # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; InCB; Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; InCB; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; InCB; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; InCB; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; InCB; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; InCB; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; InCB; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; InCB; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; InCB; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; InCB; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; InCB; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; InCB; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; InCB; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; InCB; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; InCB; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; InCB; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; InCB; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; InCB; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; InCB; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; InCB; Extend # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; InCB; Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; InCB; Extend # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; InCB; Extend # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; InCB; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; InCB; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; InCB; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; InCB; Extend # Mn KAWI VOWEL SIGN EU +11F41 ; InCB; Extend # Mc KAWI SIGN KILLER +11F42 ; InCB; Extend # Mn KAWI CONJOINER +11F5A ; InCB; Extend # Mn KAWI SIGN NUKTA +13440 ; InCB; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; InCB; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; InCB; Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; InCB; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; InCB; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; InCB; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; InCB; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; InCB; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; InCB; Extend # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; InCB; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1BC9D..1BC9E ; InCB; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; InCB; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; InCB; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; InCB; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; InCB; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; InCB; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; InCB; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; InCB; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; InCB; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; InCB; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; InCB; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; InCB; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; InCB; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; InCB; Extend # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; InCB; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; InCB; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; InCB; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; InCB; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; InCB; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; InCB; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; InCB; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; InCB; Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; InCB; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; InCB; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; InCB; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1F3FB..1F3FF ; InCB; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2192 + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/DerivedGeneralCategory.txt b/3rd/pcre2/maint/Unicode.tables/DerivedGeneralCategory.txt new file mode 100644 index 00000000..07bf7bca --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/DerivedGeneralCategory.txt @@ -0,0 +1,4323 @@ +# DerivedGeneralCategory-16.0.0.txt +# Date: 2024-04-30, 21:48:17 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Property: General_Category + +# ================================================ + +# General_Category=Unassigned + +0378..0379 ; Cn # [2] .. +0380..0383 ; Cn # [4] .. +038B ; Cn # +038D ; Cn # +03A2 ; Cn # +0530 ; Cn # +0557..0558 ; Cn # [2] .. +058B..058C ; Cn # [2] .. +0590 ; Cn # +05C8..05CF ; Cn # [8] .. +05EB..05EE ; Cn # [4] .. +05F5..05FF ; Cn # [11] .. +070E ; Cn # +074B..074C ; Cn # [2] .. +07B2..07BF ; Cn # [14] .. +07FB..07FC ; Cn # [2] .. +082E..082F ; Cn # [2] .. +083F ; Cn # +085C..085D ; Cn # [2] .. +085F ; Cn # +086B..086F ; Cn # [5] .. +088F ; Cn # +0892..0896 ; Cn # [5] .. +0984 ; Cn # +098D..098E ; Cn # [2] .. +0991..0992 ; Cn # [2] .. +09A9 ; Cn # +09B1 ; Cn # +09B3..09B5 ; Cn # [3] .. +09BA..09BB ; Cn # [2] .. +09C5..09C6 ; Cn # [2] .. +09C9..09CA ; Cn # [2] .. +09CF..09D6 ; Cn # [8] .. +09D8..09DB ; Cn # [4] .. +09DE ; Cn # +09E4..09E5 ; Cn # [2] .. +09FF..0A00 ; Cn # [2] .. +0A04 ; Cn # +0A0B..0A0E ; Cn # [4] .. +0A11..0A12 ; Cn # [2] .. +0A29 ; Cn # +0A31 ; Cn # +0A34 ; Cn # +0A37 ; Cn # +0A3A..0A3B ; Cn # [2] .. +0A3D ; Cn # +0A43..0A46 ; Cn # [4] .. +0A49..0A4A ; Cn # [2] .. +0A4E..0A50 ; Cn # [3] .. +0A52..0A58 ; Cn # [7] .. +0A5D ; Cn # +0A5F..0A65 ; Cn # [7] .. +0A77..0A80 ; Cn # [10] .. +0A84 ; Cn # +0A8E ; Cn # +0A92 ; Cn # +0AA9 ; Cn # +0AB1 ; Cn # +0AB4 ; Cn # +0ABA..0ABB ; Cn # [2] .. +0AC6 ; Cn # +0ACA ; Cn # +0ACE..0ACF ; Cn # [2] .. +0AD1..0ADF ; Cn # [15] .. +0AE4..0AE5 ; Cn # [2] .. +0AF2..0AF8 ; Cn # [7] .. +0B00 ; Cn # +0B04 ; Cn # +0B0D..0B0E ; Cn # [2] .. +0B11..0B12 ; Cn # [2] .. +0B29 ; Cn # +0B31 ; Cn # +0B34 ; Cn # +0B3A..0B3B ; Cn # [2] .. +0B45..0B46 ; Cn # [2] .. +0B49..0B4A ; Cn # [2] .. +0B4E..0B54 ; Cn # [7] .. +0B58..0B5B ; Cn # [4] .. +0B5E ; Cn # +0B64..0B65 ; Cn # [2] .. +0B78..0B81 ; Cn # [10] .. +0B84 ; Cn # +0B8B..0B8D ; Cn # [3] .. +0B91 ; Cn # +0B96..0B98 ; Cn # [3] .. +0B9B ; Cn # +0B9D ; Cn # +0BA0..0BA2 ; Cn # [3] .. +0BA5..0BA7 ; Cn # [3] .. +0BAB..0BAD ; Cn # [3] .. +0BBA..0BBD ; Cn # [4] .. +0BC3..0BC5 ; Cn # [3] .. +0BC9 ; Cn # +0BCE..0BCF ; Cn # [2] .. +0BD1..0BD6 ; Cn # [6] .. +0BD8..0BE5 ; Cn # [14] .. +0BFB..0BFF ; Cn # [5] .. +0C0D ; Cn # +0C11 ; Cn # +0C29 ; Cn # +0C3A..0C3B ; Cn # [2] .. +0C45 ; Cn # +0C49 ; Cn # +0C4E..0C54 ; Cn # [7] .. +0C57 ; Cn # +0C5B..0C5C ; Cn # [2] .. +0C5E..0C5F ; Cn # [2] .. +0C64..0C65 ; Cn # [2] .. +0C70..0C76 ; Cn # [7] .. +0C8D ; Cn # +0C91 ; Cn # +0CA9 ; Cn # +0CB4 ; Cn # +0CBA..0CBB ; Cn # [2] .. +0CC5 ; Cn # +0CC9 ; Cn # +0CCE..0CD4 ; Cn # [7] .. +0CD7..0CDC ; Cn # [6] .. +0CDF ; Cn # +0CE4..0CE5 ; Cn # [2] .. +0CF0 ; Cn # +0CF4..0CFF ; Cn # [12] .. +0D0D ; Cn # +0D11 ; Cn # +0D45 ; Cn # +0D49 ; Cn # +0D50..0D53 ; Cn # [4] .. +0D64..0D65 ; Cn # [2] .. +0D80 ; Cn # +0D84 ; Cn # +0D97..0D99 ; Cn # [3] .. +0DB2 ; Cn # +0DBC ; Cn # +0DBE..0DBF ; Cn # [2] .. +0DC7..0DC9 ; Cn # [3] .. +0DCB..0DCE ; Cn # [4] .. +0DD5 ; Cn # +0DD7 ; Cn # +0DE0..0DE5 ; Cn # [6] .. +0DF0..0DF1 ; Cn # [2] .. +0DF5..0E00 ; Cn # [12] .. +0E3B..0E3E ; Cn # [4] .. +0E5C..0E80 ; Cn # [37] .. +0E83 ; Cn # +0E85 ; Cn # +0E8B ; Cn # +0EA4 ; Cn # +0EA6 ; Cn # +0EBE..0EBF ; Cn # [2] .. +0EC5 ; Cn # +0EC7 ; Cn # +0ECF ; Cn # +0EDA..0EDB ; Cn # [2] .. +0EE0..0EFF ; Cn # [32] .. +0F48 ; Cn # +0F6D..0F70 ; Cn # [4] .. +0F98 ; Cn # +0FBD ; Cn # +0FCD ; Cn # +0FDB..0FFF ; Cn # [37] .. +10C6 ; Cn # +10C8..10CC ; Cn # [5] .. +10CE..10CF ; Cn # [2] .. +1249 ; Cn # +124E..124F ; Cn # [2] .. +1257 ; Cn # +1259 ; Cn # +125E..125F ; Cn # [2] .. +1289 ; Cn # +128E..128F ; Cn # [2] .. +12B1 ; Cn # +12B6..12B7 ; Cn # [2] .. +12BF ; Cn # +12C1 ; Cn # +12C6..12C7 ; Cn # [2] .. +12D7 ; Cn # +1311 ; Cn # +1316..1317 ; Cn # [2] .. +135B..135C ; Cn # [2] .. +137D..137F ; Cn # [3] .. +139A..139F ; Cn # [6] .. +13F6..13F7 ; Cn # [2] .. +13FE..13FF ; Cn # [2] .. +169D..169F ; Cn # [3] .. +16F9..16FF ; Cn # [7] .. +1716..171E ; Cn # [9] .. +1737..173F ; Cn # [9] .. +1754..175F ; Cn # [12] .. +176D ; Cn # +1771 ; Cn # +1774..177F ; Cn # [12] .. +17DE..17DF ; Cn # [2] .. +17EA..17EF ; Cn # [6] .. +17FA..17FF ; Cn # [6] .. +181A..181F ; Cn # [6] .. +1879..187F ; Cn # [7] .. +18AB..18AF ; Cn # [5] .. +18F6..18FF ; Cn # [10] .. +191F ; Cn # +192C..192F ; Cn # [4] .. +193C..193F ; Cn # [4] .. +1941..1943 ; Cn # [3] .. +196E..196F ; Cn # [2] .. +1975..197F ; Cn # [11] .. +19AC..19AF ; Cn # [4] .. +19CA..19CF ; Cn # [6] .. +19DB..19DD ; Cn # [3] .. +1A1C..1A1D ; Cn # [2] .. +1A5F ; Cn # +1A7D..1A7E ; Cn # [2] .. +1A8A..1A8F ; Cn # [6] .. +1A9A..1A9F ; Cn # [6] .. +1AAE..1AAF ; Cn # [2] .. +1ACF..1AFF ; Cn # [49] .. +1B4D ; Cn # +1BF4..1BFB ; Cn # [8] .. +1C38..1C3A ; Cn # [3] .. +1C4A..1C4C ; Cn # [3] .. +1C8B..1C8F ; Cn # [5] .. +1CBB..1CBC ; Cn # [2] .. +1CC8..1CCF ; Cn # [8] .. +1CFB..1CFF ; Cn # [5] .. +1F16..1F17 ; Cn # [2] .. +1F1E..1F1F ; Cn # [2] .. +1F46..1F47 ; Cn # [2] .. +1F4E..1F4F ; Cn # [2] .. +1F58 ; Cn # +1F5A ; Cn # +1F5C ; Cn # +1F5E ; Cn # +1F7E..1F7F ; Cn # [2] .. +1FB5 ; Cn # +1FC5 ; Cn # +1FD4..1FD5 ; Cn # [2] .. +1FDC ; Cn # +1FF0..1FF1 ; Cn # [2] .. +1FF5 ; Cn # +1FFF ; Cn # +2065 ; Cn # +2072..2073 ; Cn # [2] .. +208F ; Cn # +209D..209F ; Cn # [3] .. +20C1..20CF ; Cn # [15] .. +20F1..20FF ; Cn # [15] .. +218C..218F ; Cn # [4] .. +242A..243F ; Cn # [22] .. +244B..245F ; Cn # [21] .. +2B74..2B75 ; Cn # [2] .. +2B96 ; Cn # +2CF4..2CF8 ; Cn # [5] .. +2D26 ; Cn # +2D28..2D2C ; Cn # [5] .. +2D2E..2D2F ; Cn # [2] .. +2D68..2D6E ; Cn # [7] .. +2D71..2D7E ; Cn # [14] .. +2D97..2D9F ; Cn # [9] .. +2DA7 ; Cn # +2DAF ; Cn # +2DB7 ; Cn # +2DBF ; Cn # +2DC7 ; Cn # +2DCF ; Cn # +2DD7 ; Cn # +2DDF ; Cn # +2E5E..2E7F ; Cn # [34] .. +2E9A ; Cn # +2EF4..2EFF ; Cn # [12] .. +2FD6..2FEF ; Cn # [26] .. +3040 ; Cn # +3097..3098 ; Cn # [2] .. +3100..3104 ; Cn # [5] .. +3130 ; Cn # +318F ; Cn # +31E6..31EE ; Cn # [9] .. +321F ; Cn # +A48D..A48F ; Cn # [3] .. +A4C7..A4CF ; Cn # [9] .. +A62C..A63F ; Cn # [20] .. +A6F8..A6FF ; Cn # [8] .. +A7CE..A7CF ; Cn # [2] .. +A7D2 ; Cn # +A7D4 ; Cn # +A7DD..A7F1 ; Cn # [21] .. +A82D..A82F ; Cn # [3] .. +A83A..A83F ; Cn # [6] .. +A878..A87F ; Cn # [8] .. +A8C6..A8CD ; Cn # [8] .. +A8DA..A8DF ; Cn # [6] .. +A954..A95E ; Cn # [11] .. +A97D..A97F ; Cn # [3] .. +A9CE ; Cn # +A9DA..A9DD ; Cn # [4] .. +A9FF ; Cn # +AA37..AA3F ; Cn # [9] .. +AA4E..AA4F ; Cn # [2] .. +AA5A..AA5B ; Cn # [2] .. +AAC3..AADA ; Cn # [24] .. +AAF7..AB00 ; Cn # [10] .. +AB07..AB08 ; Cn # [2] .. +AB0F..AB10 ; Cn # [2] .. +AB17..AB1F ; Cn # [9] .. +AB27 ; Cn # +AB2F ; Cn # +AB6C..AB6F ; Cn # [4] .. +ABEE..ABEF ; Cn # [2] .. +ABFA..ABFF ; Cn # [6] .. +D7A4..D7AF ; Cn # [12] .. +D7C7..D7CA ; Cn # [4] .. +D7FC..D7FF ; Cn # [4] .. +FA6E..FA6F ; Cn # [2] .. +FADA..FAFF ; Cn # [38] .. +FB07..FB12 ; Cn # [12] .. +FB18..FB1C ; Cn # [5] .. +FB37 ; Cn # +FB3D ; Cn # +FB3F ; Cn # +FB42 ; Cn # +FB45 ; Cn # +FBC3..FBD2 ; Cn # [16] .. +FD90..FD91 ; Cn # [2] .. +FDC8..FDCE ; Cn # [7] .. +FDD0..FDEF ; Cn # [32] .. +FE1A..FE1F ; Cn # [6] .. +FE53 ; Cn # +FE67 ; Cn # +FE6C..FE6F ; Cn # [4] .. +FE75 ; Cn # +FEFD..FEFE ; Cn # [2] .. +FF00 ; Cn # +FFBF..FFC1 ; Cn # [3] .. +FFC8..FFC9 ; Cn # [2] .. +FFD0..FFD1 ; Cn # [2] .. +FFD8..FFD9 ; Cn # [2] .. +FFDD..FFDF ; Cn # [3] .. +FFE7 ; Cn # +FFEF..FFF8 ; Cn # [10] .. +FFFE..FFFF ; Cn # [2] .. +1000C ; Cn # +10027 ; Cn # +1003B ; Cn # +1003E ; Cn # +1004E..1004F ; Cn # [2] .. +1005E..1007F ; Cn # [34] .. +100FB..100FF ; Cn # [5] .. +10103..10106 ; Cn # [4] .. +10134..10136 ; Cn # [3] .. +1018F ; Cn # +1019D..1019F ; Cn # [3] .. +101A1..101CF ; Cn # [47] .. +101FE..1027F ; Cn # [130] .. +1029D..1029F ; Cn # [3] .. +102D1..102DF ; Cn # [15] .. +102FC..102FF ; Cn # [4] .. +10324..1032C ; Cn # [9] .. +1034B..1034F ; Cn # [5] .. +1037B..1037F ; Cn # [5] .. +1039E ; Cn # +103C4..103C7 ; Cn # [4] .. +103D6..103FF ; Cn # [42] .. +1049E..1049F ; Cn # [2] .. +104AA..104AF ; Cn # [6] .. +104D4..104D7 ; Cn # [4] .. +104FC..104FF ; Cn # [4] .. +10528..1052F ; Cn # [8] .. +10564..1056E ; Cn # [11] .. +1057B ; Cn # +1058B ; Cn # +10593 ; Cn # +10596 ; Cn # +105A2 ; Cn # +105B2 ; Cn # +105BA ; Cn # +105BD..105BF ; Cn # [3] .. +105F4..105FF ; Cn # [12] .. +10737..1073F ; Cn # [9] .. +10756..1075F ; Cn # [10] .. +10768..1077F ; Cn # [24] .. +10786 ; Cn # +107B1 ; Cn # +107BB..107FF ; Cn # [69] .. +10806..10807 ; Cn # [2] .. +10809 ; Cn # +10836 ; Cn # +10839..1083B ; Cn # [3] .. +1083D..1083E ; Cn # [2] .. +10856 ; Cn # +1089F..108A6 ; Cn # [8] .. +108B0..108DF ; Cn # [48] .. +108F3 ; Cn # +108F6..108FA ; Cn # [5] .. +1091C..1091E ; Cn # [3] .. +1093A..1093E ; Cn # [5] .. +10940..1097F ; Cn # [64] .. +109B8..109BB ; Cn # [4] .. +109D0..109D1 ; Cn # [2] .. +10A04 ; Cn # +10A07..10A0B ; Cn # [5] .. +10A14 ; Cn # +10A18 ; Cn # +10A36..10A37 ; Cn # [2] .. +10A3B..10A3E ; Cn # [4] .. +10A49..10A4F ; Cn # [7] .. +10A59..10A5F ; Cn # [7] .. +10AA0..10ABF ; Cn # [32] .. +10AE7..10AEA ; Cn # [4] .. +10AF7..10AFF ; Cn # [9] .. +10B36..10B38 ; Cn # [3] .. +10B56..10B57 ; Cn # [2] .. +10B73..10B77 ; Cn # [5] .. +10B92..10B98 ; Cn # [7] .. +10B9D..10BA8 ; Cn # [12] .. +10BB0..10BFF ; Cn # [80] .. +10C49..10C7F ; Cn # [55] .. +10CB3..10CBF ; Cn # [13] .. +10CF3..10CF9 ; Cn # [7] .. +10D28..10D2F ; Cn # [8] .. +10D3A..10D3F ; Cn # [6] .. +10D66..10D68 ; Cn # [3] .. +10D86..10D8D ; Cn # [8] .. +10D90..10E5F ; Cn # [208] .. +10E7F ; Cn # +10EAA ; Cn # +10EAE..10EAF ; Cn # [2] .. +10EB2..10EC1 ; Cn # [16] .. +10EC5..10EFB ; Cn # [55] .. +10F28..10F2F ; Cn # [8] .. +10F5A..10F6F ; Cn # [22] .. +10F8A..10FAF ; Cn # [38] .. +10FCC..10FDF ; Cn # [20] .. +10FF7..10FFF ; Cn # [9] .. +1104E..11051 ; Cn # [4] .. +11076..1107E ; Cn # [9] .. +110C3..110CC ; Cn # [10] .. +110CE..110CF ; Cn # [2] .. +110E9..110EF ; Cn # [7] .. +110FA..110FF ; Cn # [6] .. +11135 ; Cn # +11148..1114F ; Cn # [8] .. +11177..1117F ; Cn # [9] .. +111E0 ; Cn # +111F5..111FF ; Cn # [11] .. +11212 ; Cn # +11242..1127F ; Cn # [62] .. +11287 ; Cn # +11289 ; Cn # +1128E ; Cn # +1129E ; Cn # +112AA..112AF ; Cn # [6] .. +112EB..112EF ; Cn # [5] .. +112FA..112FF ; Cn # [6] .. +11304 ; Cn # +1130D..1130E ; Cn # [2] .. +11311..11312 ; Cn # [2] .. +11329 ; Cn # +11331 ; Cn # +11334 ; Cn # +1133A ; Cn # +11345..11346 ; Cn # [2] .. +11349..1134A ; Cn # [2] .. +1134E..1134F ; Cn # [2] .. +11351..11356 ; Cn # [6] .. +11358..1135C ; Cn # [5] .. +11364..11365 ; Cn # [2] .. +1136D..1136F ; Cn # [3] .. +11375..1137F ; Cn # [11] .. +1138A ; Cn # +1138C..1138D ; Cn # [2] .. +1138F ; Cn # +113B6 ; Cn # +113C1 ; Cn # +113C3..113C4 ; Cn # [2] .. +113C6 ; Cn # +113CB ; Cn # +113D6 ; Cn # +113D9..113E0 ; Cn # [8] .. +113E3..113FF ; Cn # [29] .. +1145C ; Cn # +11462..1147F ; Cn # [30] .. +114C8..114CF ; Cn # [8] .. +114DA..1157F ; Cn # [166] .. +115B6..115B7 ; Cn # [2] .. +115DE..115FF ; Cn # [34] .. +11645..1164F ; Cn # [11] .. +1165A..1165F ; Cn # [6] .. +1166D..1167F ; Cn # [19] .. +116BA..116BF ; Cn # [6] .. +116CA..116CF ; Cn # [6] .. +116E4..116FF ; Cn # [28] .. +1171B..1171C ; Cn # [2] .. +1172C..1172F ; Cn # [4] .. +11747..117FF ; Cn # [185] .. +1183C..1189F ; Cn # [100] .. +118F3..118FE ; Cn # [12] .. +11907..11908 ; Cn # [2] .. +1190A..1190B ; Cn # [2] .. +11914 ; Cn # +11917 ; Cn # +11936 ; Cn # +11939..1193A ; Cn # [2] .. +11947..1194F ; Cn # [9] .. +1195A..1199F ; Cn # [70] .. +119A8..119A9 ; Cn # [2] .. +119D8..119D9 ; Cn # [2] .. +119E5..119FF ; Cn # [27] .. +11A48..11A4F ; Cn # [8] .. +11AA3..11AAF ; Cn # [13] .. +11AF9..11AFF ; Cn # [7] .. +11B0A..11BBF ; Cn # [182] .. +11BE2..11BEF ; Cn # [14] .. +11BFA..11BFF ; Cn # [6] .. +11C09 ; Cn # +11C37 ; Cn # +11C46..11C4F ; Cn # [10] .. +11C6D..11C6F ; Cn # [3] .. +11C90..11C91 ; Cn # [2] .. +11CA8 ; Cn # +11CB7..11CFF ; Cn # [73] .. +11D07 ; Cn # +11D0A ; Cn # +11D37..11D39 ; Cn # [3] .. +11D3B ; Cn # +11D3E ; Cn # +11D48..11D4F ; Cn # [8] .. +11D5A..11D5F ; Cn # [6] .. +11D66 ; Cn # +11D69 ; Cn # +11D8F ; Cn # +11D92 ; Cn # +11D99..11D9F ; Cn # [7] .. +11DAA..11EDF ; Cn # [310] .. +11EF9..11EFF ; Cn # [7] .. +11F11 ; Cn # +11F3B..11F3D ; Cn # [3] .. +11F5B..11FAF ; Cn # [85] .. +11FB1..11FBF ; Cn # [15] .. +11FF2..11FFE ; Cn # [13] .. +1239A..123FF ; Cn # [102] .. +1246F ; Cn # +12475..1247F ; Cn # [11] .. +12544..12F8F ; Cn # [2636] .. +12FF3..12FFF ; Cn # [13] .. +13456..1345F ; Cn # [10] .. +143FB..143FF ; Cn # [5] .. +14647..160FF ; Cn # [6841] .. +1613A..167FF ; Cn # [1734] .. +16A39..16A3F ; Cn # [7] .. +16A5F ; Cn # +16A6A..16A6D ; Cn # [4] .. +16ABF ; Cn # +16ACA..16ACF ; Cn # [6] .. +16AEE..16AEF ; Cn # [2] .. +16AF6..16AFF ; Cn # [10] .. +16B46..16B4F ; Cn # [10] .. +16B5A ; Cn # +16B62 ; Cn # +16B78..16B7C ; Cn # [5] .. +16B90..16D3F ; Cn # [432] .. +16D7A..16E3F ; Cn # [198] .. +16E9B..16EFF ; Cn # [101] .. +16F4B..16F4E ; Cn # [4] .. +16F88..16F8E ; Cn # [7] .. +16FA0..16FDF ; Cn # [64] .. +16FE5..16FEF ; Cn # [11] .. +16FF2..16FFF ; Cn # [14] .. +187F8..187FF ; Cn # [8] .. +18CD6..18CFE ; Cn # [41] .. +18D09..1AFEF ; Cn # [8935] .. +1AFF4 ; Cn # +1AFFC ; Cn # +1AFFF ; Cn # +1B123..1B131 ; Cn # [15] .. +1B133..1B14F ; Cn # [29] .. +1B153..1B154 ; Cn # [2] .. +1B156..1B163 ; Cn # [14] .. +1B168..1B16F ; Cn # [8] .. +1B2FC..1BBFF ; Cn # [2308] .. +1BC6B..1BC6F ; Cn # [5] .. +1BC7D..1BC7F ; Cn # [3] .. +1BC89..1BC8F ; Cn # [7] .. +1BC9A..1BC9B ; Cn # [2] .. +1BCA4..1CBFF ; Cn # [3932] .. +1CCFA..1CCFF ; Cn # [6] .. +1CEB4..1CEFF ; Cn # [76] .. +1CF2E..1CF2F ; Cn # [2] .. +1CF47..1CF4F ; Cn # [9] .. +1CFC4..1CFFF ; Cn # [60] .. +1D0F6..1D0FF ; Cn # [10] .. +1D127..1D128 ; Cn # [2] .. +1D1EB..1D1FF ; Cn # [21] .. +1D246..1D2BF ; Cn # [122] .. +1D2D4..1D2DF ; Cn # [12] .. +1D2F4..1D2FF ; Cn # [12] .. +1D357..1D35F ; Cn # [9] .. +1D379..1D3FF ; Cn # [135] .. +1D455 ; Cn # +1D49D ; Cn # +1D4A0..1D4A1 ; Cn # [2] .. +1D4A3..1D4A4 ; Cn # [2] .. +1D4A7..1D4A8 ; Cn # [2] .. +1D4AD ; Cn # +1D4BA ; Cn # +1D4BC ; Cn # +1D4C4 ; Cn # +1D506 ; Cn # +1D50B..1D50C ; Cn # [2] .. +1D515 ; Cn # +1D51D ; Cn # +1D53A ; Cn # +1D53F ; Cn # +1D545 ; Cn # +1D547..1D549 ; Cn # [3] .. +1D551 ; Cn # +1D6A6..1D6A7 ; Cn # [2] .. +1D7CC..1D7CD ; Cn # [2] .. +1DA8C..1DA9A ; Cn # [15] .. +1DAA0 ; Cn # +1DAB0..1DEFF ; Cn # [1104] .. +1DF1F..1DF24 ; Cn # [6] .. +1DF2B..1DFFF ; Cn # [213] .. +1E007 ; Cn # +1E019..1E01A ; Cn # [2] .. +1E022 ; Cn # +1E025 ; Cn # +1E02B..1E02F ; Cn # [5] .. +1E06E..1E08E ; Cn # [33] .. +1E090..1E0FF ; Cn # [112] .. +1E12D..1E12F ; Cn # [3] .. +1E13E..1E13F ; Cn # [2] .. +1E14A..1E14D ; Cn # [4] .. +1E150..1E28F ; Cn # [320] .. +1E2AF..1E2BF ; Cn # [17] .. +1E2FA..1E2FE ; Cn # [5] .. +1E300..1E4CF ; Cn # [464] .. +1E4FA..1E5CF ; Cn # [214] .. +1E5FB..1E5FE ; Cn # [4] .. +1E600..1E7DF ; Cn # [480] .. +1E7E7 ; Cn # +1E7EC ; Cn # +1E7EF ; Cn # +1E7FF ; Cn # +1E8C5..1E8C6 ; Cn # [2] .. +1E8D7..1E8FF ; Cn # [41] .. +1E94C..1E94F ; Cn # [4] .. +1E95A..1E95D ; Cn # [4] .. +1E960..1EC70 ; Cn # [785] .. +1ECB5..1ED00 ; Cn # [76] .. +1ED3E..1EDFF ; Cn # [194] .. +1EE04 ; Cn # +1EE20 ; Cn # +1EE23 ; Cn # +1EE25..1EE26 ; Cn # [2] .. +1EE28 ; Cn # +1EE33 ; Cn # +1EE38 ; Cn # +1EE3A ; Cn # +1EE3C..1EE41 ; Cn # [6] .. +1EE43..1EE46 ; Cn # [4] .. +1EE48 ; Cn # +1EE4A ; Cn # +1EE4C ; Cn # +1EE50 ; Cn # +1EE53 ; Cn # +1EE55..1EE56 ; Cn # [2] .. +1EE58 ; Cn # +1EE5A ; Cn # +1EE5C ; Cn # +1EE5E ; Cn # +1EE60 ; Cn # +1EE63 ; Cn # +1EE65..1EE66 ; Cn # [2] .. +1EE6B ; Cn # +1EE73 ; Cn # +1EE78 ; Cn # +1EE7D ; Cn # +1EE7F ; Cn # +1EE8A ; Cn # +1EE9C..1EEA0 ; Cn # [5] .. +1EEA4 ; Cn # +1EEAA ; Cn # +1EEBC..1EEEF ; Cn # [52] .. +1EEF2..1EFFF ; Cn # [270] .. +1F02C..1F02F ; Cn # [4] .. +1F094..1F09F ; Cn # [12] .. +1F0AF..1F0B0 ; Cn # [2] .. +1F0C0 ; Cn # +1F0D0 ; Cn # +1F0F6..1F0FF ; Cn # [10] .. +1F1AE..1F1E5 ; Cn # [56] .. +1F203..1F20F ; Cn # [13] .. +1F23C..1F23F ; Cn # [4] .. +1F249..1F24F ; Cn # [7] .. +1F252..1F25F ; Cn # [14] .. +1F266..1F2FF ; Cn # [154] .. +1F6D8..1F6DB ; Cn # [4] .. +1F6ED..1F6EF ; Cn # [3] .. +1F6FD..1F6FF ; Cn # [3] .. +1F777..1F77A ; Cn # [4] .. +1F7DA..1F7DF ; Cn # [6] .. +1F7EC..1F7EF ; Cn # [4] .. +1F7F1..1F7FF ; Cn # [15] .. +1F80C..1F80F ; Cn # [4] .. +1F848..1F84F ; Cn # [8] .. +1F85A..1F85F ; Cn # [6] .. +1F888..1F88F ; Cn # [8] .. +1F8AE..1F8AF ; Cn # [2] .. +1F8BC..1F8BF ; Cn # [4] .. +1F8C2..1F8FF ; Cn # [62] .. +1FA54..1FA5F ; Cn # [12] .. +1FA6E..1FA6F ; Cn # [2] .. +1FA7D..1FA7F ; Cn # [3] .. +1FA8A..1FA8E ; Cn # [5] .. +1FAC7..1FACD ; Cn # [7] .. +1FADD..1FADE ; Cn # [2] .. +1FAEA..1FAEF ; Cn # [6] .. +1FAF9..1FAFF ; Cn # [7] .. +1FB93 ; Cn # +1FBFA..1FFFF ; Cn # [1030] .. +2A6E0..2A6FF ; Cn # [32] .. +2B73A..2B73F ; Cn # [6] .. +2B81E..2B81F ; Cn # [2] .. +2CEA2..2CEAF ; Cn # [14] .. +2EBE1..2EBEF ; Cn # [15] .. +2EE5E..2F7FF ; Cn # [2466] .. +2FA1E..2FFFF ; Cn # [1506] .. +3134B..3134F ; Cn # [5] .. +323B0..E0000 ; Cn # [711761] .. +E0002..E001F ; Cn # [30] .. +E0080..E00FF ; Cn # [128] .. +E01F0..EFFFF ; Cn # [65040] .. +FFFFE..FFFFF ; Cn # [2] .. +10FFFE..10FFFF; Cn # [2] .. + +# Total code points: 819533 + +# ================================================ + +# General_Category=Uppercase_Letter + +0041..005A ; Lu # [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00C0..00D6 ; Lu # [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DE ; Lu # [7] LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN +0100 ; Lu # LATIN CAPITAL LETTER A WITH MACRON +0102 ; Lu # LATIN CAPITAL LETTER A WITH BREVE +0104 ; Lu # LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Lu # LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Lu # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Lu # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Lu # LATIN CAPITAL LETTER C WITH CARON +010E ; Lu # LATIN CAPITAL LETTER D WITH CARON +0110 ; Lu # LATIN CAPITAL LETTER D WITH STROKE +0112 ; Lu # LATIN CAPITAL LETTER E WITH MACRON +0114 ; Lu # LATIN CAPITAL LETTER E WITH BREVE +0116 ; Lu # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Lu # LATIN CAPITAL LETTER E WITH OGONEK +011A ; Lu # LATIN CAPITAL LETTER E WITH CARON +011C ; Lu # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Lu # LATIN CAPITAL LETTER G WITH BREVE +0120 ; Lu # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Lu # LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Lu # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Lu # LATIN CAPITAL LETTER H WITH STROKE +0128 ; Lu # LATIN CAPITAL LETTER I WITH TILDE +012A ; Lu # LATIN CAPITAL LETTER I WITH MACRON +012C ; Lu # LATIN CAPITAL LETTER I WITH BREVE +012E ; Lu # LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Lu # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Lu # LATIN CAPITAL LIGATURE IJ +0134 ; Lu # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Lu # LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Lu # LATIN CAPITAL LETTER L WITH ACUTE +013B ; Lu # LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Lu # LATIN CAPITAL LETTER L WITH CARON +013F ; Lu # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Lu # LATIN CAPITAL LETTER L WITH STROKE +0143 ; Lu # LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Lu # LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Lu # LATIN CAPITAL LETTER N WITH CARON +014A ; Lu # LATIN CAPITAL LETTER ENG +014C ; Lu # LATIN CAPITAL LETTER O WITH MACRON +014E ; Lu # LATIN CAPITAL LETTER O WITH BREVE +0150 ; Lu # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Lu # LATIN CAPITAL LIGATURE OE +0154 ; Lu # LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Lu # LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Lu # LATIN CAPITAL LETTER R WITH CARON +015A ; Lu # LATIN CAPITAL LETTER S WITH ACUTE +015C ; Lu # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Lu # LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Lu # LATIN CAPITAL LETTER S WITH CARON +0162 ; Lu # LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Lu # LATIN CAPITAL LETTER T WITH CARON +0166 ; Lu # LATIN CAPITAL LETTER T WITH STROKE +0168 ; Lu # LATIN CAPITAL LETTER U WITH TILDE +016A ; Lu # LATIN CAPITAL LETTER U WITH MACRON +016C ; Lu # LATIN CAPITAL LETTER U WITH BREVE +016E ; Lu # LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Lu # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Lu # LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Lu # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Lu # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Lu # [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Lu # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Lu # LATIN CAPITAL LETTER Z WITH CARON +0181..0182 ; Lu # [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Lu # LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Lu # [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Lu # [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Lu # [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Lu # [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Lu # [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Lu # [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Lu # [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Lu # LATIN CAPITAL LETTER OI +01A4 ; Lu # LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Lu # [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Lu # LATIN CAPITAL LETTER ESH +01AC ; Lu # LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Lu # [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Lu # [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Lu # LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Lu # [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Lu # LATIN CAPITAL LETTER TONE FIVE +01C4 ; Lu # LATIN CAPITAL LETTER DZ WITH CARON +01C7 ; Lu # LATIN CAPITAL LETTER LJ +01CA ; Lu # LATIN CAPITAL LETTER NJ +01CD ; Lu # LATIN CAPITAL LETTER A WITH CARON +01CF ; Lu # LATIN CAPITAL LETTER I WITH CARON +01D1 ; Lu # LATIN CAPITAL LETTER O WITH CARON +01D3 ; Lu # LATIN CAPITAL LETTER U WITH CARON +01D5 ; Lu # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Lu # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Lu # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Lu # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Lu # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Lu # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Lu # LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Lu # LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Lu # LATIN CAPITAL LETTER G WITH CARON +01E8 ; Lu # LATIN CAPITAL LETTER K WITH CARON +01EA ; Lu # LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Lu # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Lu # LATIN CAPITAL LETTER EZH WITH CARON +01F1 ; Lu # LATIN CAPITAL LETTER DZ +01F4 ; Lu # LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Lu # [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Lu # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Lu # LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Lu # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Lu # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Lu # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Lu # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Lu # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Lu # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Lu # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Lu # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Lu # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Lu # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Lu # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Lu # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Lu # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Lu # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Lu # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Lu # LATIN CAPITAL LETTER YOGH +021E ; Lu # LATIN CAPITAL LETTER H WITH CARON +0220 ; Lu # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Lu # LATIN CAPITAL LETTER OU +0224 ; Lu # LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Lu # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Lu # LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Lu # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Lu # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Lu # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Lu # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Lu # LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Lu # [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Lu # [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Lu # LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Lu # [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Lu # LATIN CAPITAL LETTER J WITH STROKE +024A ; Lu # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Lu # LATIN CAPITAL LETTER R WITH STROKE +024E ; Lu # LATIN CAPITAL LETTER Y WITH STROKE +0370 ; Lu # GREEK CAPITAL LETTER HETA +0372 ; Lu # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Lu # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Lu # GREEK CAPITAL LETTER YOT +0386 ; Lu # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Lu # [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Lu # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Lu # [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Lu # [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Lu # [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03CF ; Lu # GREEK CAPITAL KAI SYMBOL +03D2..03D4 ; Lu # [3] GREEK UPSILON WITH HOOK SYMBOL..GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL +03D8 ; Lu # GREEK LETTER ARCHAIC KOPPA +03DA ; Lu # GREEK LETTER STIGMA +03DC ; Lu # GREEK LETTER DIGAMMA +03DE ; Lu # GREEK LETTER KOPPA +03E0 ; Lu # GREEK LETTER SAMPI +03E2 ; Lu # COPTIC CAPITAL LETTER SHEI +03E4 ; Lu # COPTIC CAPITAL LETTER FEI +03E6 ; Lu # COPTIC CAPITAL LETTER KHEI +03E8 ; Lu # COPTIC CAPITAL LETTER HORI +03EA ; Lu # COPTIC CAPITAL LETTER GANGIA +03EC ; Lu # COPTIC CAPITAL LETTER SHIMA +03EE ; Lu # COPTIC CAPITAL LETTER DEI +03F4 ; Lu # GREEK CAPITAL THETA SYMBOL +03F7 ; Lu # GREEK CAPITAL LETTER SHO +03F9..03FA ; Lu # [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Lu # [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Lu # CYRILLIC CAPITAL LETTER OMEGA +0462 ; Lu # CYRILLIC CAPITAL LETTER YAT +0464 ; Lu # CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Lu # CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Lu # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Lu # CYRILLIC CAPITAL LETTER BIG YUS +046C ; Lu # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Lu # CYRILLIC CAPITAL LETTER KSI +0470 ; Lu # CYRILLIC CAPITAL LETTER PSI +0472 ; Lu # CYRILLIC CAPITAL LETTER FITA +0474 ; Lu # CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Lu # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Lu # CYRILLIC CAPITAL LETTER UK +047A ; Lu # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Lu # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Lu # CYRILLIC CAPITAL LETTER OT +0480 ; Lu # CYRILLIC CAPITAL LETTER KOPPA +048A ; Lu # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Lu # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Lu # CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Lu # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Lu # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Lu # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Lu # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Lu # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Lu # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Lu # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Lu # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Lu # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Lu # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Lu # CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Lu # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Lu # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Lu # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Lu # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Lu # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Lu # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Lu # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Lu # CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Lu # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Lu # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Lu # CYRILLIC CAPITAL LETTER SHHA +04BC ; Lu # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Lu # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Lu # [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Lu # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Lu # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Lu # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Lu # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Lu # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Lu # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Lu # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Lu # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Lu # CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Lu # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Lu # CYRILLIC CAPITAL LETTER SCHWA +04DA ; Lu # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Lu # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Lu # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Lu # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Lu # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Lu # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Lu # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Lu # CYRILLIC CAPITAL LETTER BARRED O +04EA ; Lu # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Lu # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Lu # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Lu # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Lu # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Lu # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Lu # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Lu # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Lu # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Lu # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Lu # CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Lu # CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Lu # CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Lu # CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Lu # CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Lu # CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Lu # CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Lu # CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Lu # CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Lu # CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Lu # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Lu # CYRILLIC CAPITAL LETTER LHA +0516 ; Lu # CYRILLIC CAPITAL LETTER RHA +0518 ; Lu # CYRILLIC CAPITAL LETTER YAE +051A ; Lu # CYRILLIC CAPITAL LETTER QA +051C ; Lu # CYRILLIC CAPITAL LETTER WE +051E ; Lu # CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Lu # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Lu # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Lu # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Lu # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Lu # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Lu # CYRILLIC CAPITAL LETTER DZZHE +052C ; Lu # CYRILLIC CAPITAL LETTER DCHE +052E ; Lu # CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Lu # [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +10A0..10C5 ; Lu # [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Lu # GEORGIAN CAPITAL LETTER YN +10CD ; Lu # GEORGIAN CAPITAL LETTER AEN +13A0..13F5 ; Lu # [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Lu # CYRILLIC CAPITAL LETTER TJE +1C90..1CBA ; Lu # [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Lu # [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Lu # LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Lu # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Lu # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Lu # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Lu # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Lu # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Lu # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Lu # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Lu # LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Lu # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Lu # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Lu # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Lu # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Lu # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Lu # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Lu # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Lu # LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Lu # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Lu # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Lu # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Lu # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Lu # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Lu # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Lu # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Lu # LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Lu # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Lu # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Lu # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Lu # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Lu # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Lu # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Lu # LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Lu # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Lu # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Lu # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Lu # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Lu # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Lu # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Lu # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Lu # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Lu # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Lu # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Lu # LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Lu # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Lu # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Lu # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Lu # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Lu # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Lu # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Lu # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Lu # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Lu # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Lu # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Lu # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Lu # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Lu # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Lu # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Lu # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Lu # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Lu # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Lu # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Lu # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Lu # LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Lu # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Lu # LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Lu # LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Lu # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Lu # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Lu # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Lu # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Lu # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Lu # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Lu # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Lu # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Lu # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9E ; Lu # LATIN CAPITAL LETTER SHARP S +1EA0 ; Lu # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Lu # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Lu # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Lu # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Lu # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Lu # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Lu # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Lu # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Lu # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Lu # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Lu # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Lu # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Lu # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Lu # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Lu # LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Lu # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Lu # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Lu # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Lu # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Lu # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Lu # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Lu # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Lu # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Lu # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Lu # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Lu # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Lu # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Lu # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Lu # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Lu # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Lu # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Lu # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Lu # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Lu # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Lu # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Lu # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Lu # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Lu # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Lu # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Lu # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Lu # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Lu # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Lu # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Lu # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Lu # LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Lu # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Lu # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Lu # LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Lu # [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Lu # [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Lu # [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Lu # [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Lu # [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Lu # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Lu # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Lu # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Lu # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Lu # [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1FB8..1FBB ; Lu # [4] GREEK CAPITAL LETTER ALPHA WITH VRACHY..GREEK CAPITAL LETTER ALPHA WITH OXIA +1FC8..1FCB ; Lu # [4] GREEK CAPITAL LETTER EPSILON WITH VARIA..GREEK CAPITAL LETTER ETA WITH OXIA +1FD8..1FDB ; Lu # [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Lu # [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF8..1FFB ; Lu # [4] GREEK CAPITAL LETTER OMICRON WITH VARIA..GREEK CAPITAL LETTER OMEGA WITH OXIA +2102 ; Lu # DOUBLE-STRUCK CAPITAL C +2107 ; Lu # EULER CONSTANT +210B..210D ; Lu # [3] SCRIPT CAPITAL H..DOUBLE-STRUCK CAPITAL H +2110..2112 ; Lu # [3] SCRIPT CAPITAL I..SCRIPT CAPITAL L +2115 ; Lu # DOUBLE-STRUCK CAPITAL N +2119..211D ; Lu # [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Lu # DOUBLE-STRUCK CAPITAL Z +2126 ; Lu # OHM SIGN +2128 ; Lu # BLACK-LETTER CAPITAL Z +212A..212D ; Lu # [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +2130..2133 ; Lu # [4] SCRIPT CAPITAL E..SCRIPT CAPITAL M +213E..213F ; Lu # [2] DOUBLE-STRUCK CAPITAL GAMMA..DOUBLE-STRUCK CAPITAL PI +2145 ; Lu # DOUBLE-STRUCK ITALIC CAPITAL D +2183 ; Lu # ROMAN NUMERAL REVERSED ONE HUNDRED +2C00..2C2F ; Lu # [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Lu # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Lu # [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Lu # LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Lu # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Lu # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Lu # [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Lu # LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Lu # LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Lu # [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Lu # COPTIC CAPITAL LETTER VIDA +2C84 ; Lu # COPTIC CAPITAL LETTER GAMMA +2C86 ; Lu # COPTIC CAPITAL LETTER DALDA +2C88 ; Lu # COPTIC CAPITAL LETTER EIE +2C8A ; Lu # COPTIC CAPITAL LETTER SOU +2C8C ; Lu # COPTIC CAPITAL LETTER ZATA +2C8E ; Lu # COPTIC CAPITAL LETTER HATE +2C90 ; Lu # COPTIC CAPITAL LETTER THETHE +2C92 ; Lu # COPTIC CAPITAL LETTER IAUDA +2C94 ; Lu # COPTIC CAPITAL LETTER KAPA +2C96 ; Lu # COPTIC CAPITAL LETTER LAULA +2C98 ; Lu # COPTIC CAPITAL LETTER MI +2C9A ; Lu # COPTIC CAPITAL LETTER NI +2C9C ; Lu # COPTIC CAPITAL LETTER KSI +2C9E ; Lu # COPTIC CAPITAL LETTER O +2CA0 ; Lu # COPTIC CAPITAL LETTER PI +2CA2 ; Lu # COPTIC CAPITAL LETTER RO +2CA4 ; Lu # COPTIC CAPITAL LETTER SIMA +2CA6 ; Lu # COPTIC CAPITAL LETTER TAU +2CA8 ; Lu # COPTIC CAPITAL LETTER UA +2CAA ; Lu # COPTIC CAPITAL LETTER FI +2CAC ; Lu # COPTIC CAPITAL LETTER KHI +2CAE ; Lu # COPTIC CAPITAL LETTER PSI +2CB0 ; Lu # COPTIC CAPITAL LETTER OOU +2CB2 ; Lu # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Lu # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Lu # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Lu # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Lu # COPTIC CAPITAL LETTER SAMPI +2CC2 ; Lu # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Lu # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Lu # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Lu # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Lu # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Lu # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Lu # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Lu # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Lu # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Lu # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Lu # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Lu # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Lu # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Lu # CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Lu # CYRILLIC CAPITAL LETTER DZELO +A644 ; Lu # CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Lu # CYRILLIC CAPITAL LETTER IOTA +A648 ; Lu # CYRILLIC CAPITAL LETTER DJERV +A64A ; Lu # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Lu # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Lu # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Lu # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Lu # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Lu # CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Lu # CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Lu # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Lu # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Lu # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Lu # CYRILLIC CAPITAL LETTER YN +A660 ; Lu # CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Lu # CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Lu # CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Lu # CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Lu # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Lu # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Lu # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Lu # CYRILLIC CAPITAL LETTER DWE +A682 ; Lu # CYRILLIC CAPITAL LETTER DZWE +A684 ; Lu # CYRILLIC CAPITAL LETTER ZHWE +A686 ; Lu # CYRILLIC CAPITAL LETTER CCHE +A688 ; Lu # CYRILLIC CAPITAL LETTER DZZE +A68A ; Lu # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Lu # CYRILLIC CAPITAL LETTER TWE +A68E ; Lu # CYRILLIC CAPITAL LETTER TSWE +A690 ; Lu # CYRILLIC CAPITAL LETTER TSSE +A692 ; Lu # CYRILLIC CAPITAL LETTER TCHE +A694 ; Lu # CYRILLIC CAPITAL LETTER HWE +A696 ; Lu # CYRILLIC CAPITAL LETTER SHWE +A698 ; Lu # CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Lu # CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Lu # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Lu # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Lu # LATIN CAPITAL LETTER HENG +A728 ; Lu # LATIN CAPITAL LETTER TZ +A72A ; Lu # LATIN CAPITAL LETTER TRESILLO +A72C ; Lu # LATIN CAPITAL LETTER CUATRILLO +A72E ; Lu # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Lu # LATIN CAPITAL LETTER AA +A734 ; Lu # LATIN CAPITAL LETTER AO +A736 ; Lu # LATIN CAPITAL LETTER AU +A738 ; Lu # LATIN CAPITAL LETTER AV +A73A ; Lu # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Lu # LATIN CAPITAL LETTER AY +A73E ; Lu # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Lu # LATIN CAPITAL LETTER K WITH STROKE +A742 ; Lu # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Lu # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Lu # LATIN CAPITAL LETTER BROKEN L +A748 ; Lu # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Lu # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Lu # LATIN CAPITAL LETTER O WITH LOOP +A74E ; Lu # LATIN CAPITAL LETTER OO +A750 ; Lu # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Lu # LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Lu # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Lu # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Lu # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Lu # LATIN CAPITAL LETTER R ROTUNDA +A75C ; Lu # LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Lu # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Lu # LATIN CAPITAL LETTER VY +A762 ; Lu # LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Lu # LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Lu # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Lu # LATIN CAPITAL LETTER VEND +A76A ; Lu # LATIN CAPITAL LETTER ET +A76C ; Lu # LATIN CAPITAL LETTER IS +A76E ; Lu # LATIN CAPITAL LETTER CON +A779 ; Lu # LATIN CAPITAL LETTER INSULAR D +A77B ; Lu # LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Lu # [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Lu # LATIN CAPITAL LETTER TURNED L +A782 ; Lu # LATIN CAPITAL LETTER INSULAR R +A784 ; Lu # LATIN CAPITAL LETTER INSULAR S +A786 ; Lu # LATIN CAPITAL LETTER INSULAR T +A78B ; Lu # LATIN CAPITAL LETTER SALTILLO +A78D ; Lu # LATIN CAPITAL LETTER TURNED H +A790 ; Lu # LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Lu # LATIN CAPITAL LETTER C WITH BAR +A796 ; Lu # LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Lu # LATIN CAPITAL LETTER F WITH STROKE +A79A ; Lu # LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Lu # LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Lu # LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Lu # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Lu # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Lu # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Lu # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Lu # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Lu # [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Lu # [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Lu # LATIN CAPITAL LETTER OMEGA +A7B8 ; Lu # LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Lu # LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Lu # LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Lu # LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Lu # LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7CB..A7CC ; Lu # [2] LATIN CAPITAL LETTER RAMS HORN..LATIN CAPITAL LETTER S WITH DIAGONAL STROKE +A7D0 ; Lu # LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Lu # LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Lu # LATIN CAPITAL LETTER SIGMOID S +A7DA ; Lu # LATIN CAPITAL LETTER LAMBDA +A7DC ; Lu # LATIN CAPITAL LETTER LAMBDA WITH STROKE +A7F5 ; Lu # LATIN CAPITAL LETTER REVERSED HALF H +FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Lu # [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Lu # [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Lu # [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Lu # [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Lu # [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Lu # [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Lu # [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +118A0..118BF ; Lu # [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Lu # [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z +1D434..1D44D ; Lu # [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z +1D468..1D481 ; Lu # [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z +1D49C ; Lu # MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Lu # [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Lu # MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Lu # [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Lu # [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B5 ; Lu # [8] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT CAPITAL Z +1D4D0..1D4E9 ; Lu # [26] MATHEMATICAL BOLD SCRIPT CAPITAL A..MATHEMATICAL BOLD SCRIPT CAPITAL Z +1D504..1D505 ; Lu # [2] MATHEMATICAL FRAKTUR CAPITAL A..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Lu # [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Lu # [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Lu # [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D538..1D539 ; Lu # [2] MATHEMATICAL DOUBLE-STRUCK CAPITAL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Lu # [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Lu # [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Lu # MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Lu # [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D56C..1D585 ; Lu # [26] MATHEMATICAL BOLD FRAKTUR CAPITAL A..MATHEMATICAL BOLD FRAKTUR CAPITAL Z +1D5A0..1D5B9 ; Lu # [26] MATHEMATICAL SANS-SERIF CAPITAL A..MATHEMATICAL SANS-SERIF CAPITAL Z +1D5D4..1D5ED ; Lu # [26] MATHEMATICAL SANS-SERIF BOLD CAPITAL A..MATHEMATICAL SANS-SERIF BOLD CAPITAL Z +1D608..1D621 ; Lu # [26] MATHEMATICAL SANS-SERIF ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF ITALIC CAPITAL Z +1D63C..1D655 ; Lu # [26] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Z +1D670..1D689 ; Lu # [26] MATHEMATICAL MONOSPACE CAPITAL A..MATHEMATICAL MONOSPACE CAPITAL Z +1D6A8..1D6C0 ; Lu # [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6E2..1D6FA ; Lu # [25] MATHEMATICAL ITALIC CAPITAL ALPHA..MATHEMATICAL ITALIC CAPITAL OMEGA +1D71C..1D734 ; Lu # [25] MATHEMATICAL BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D756..1D76E ; Lu # [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D790..1D7A8 ; Lu # [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA +1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA + +# Total code points: 1858 + +# ================================================ + +# General_Category=Lowercase_Letter + +0061..007A ; Ll # [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Ll # MICRO SIGN +00DF..00F6 ; Ll # [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Ll # [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Ll # LATIN SMALL LETTER A WITH MACRON +0103 ; Ll # LATIN SMALL LETTER A WITH BREVE +0105 ; Ll # LATIN SMALL LETTER A WITH OGONEK +0107 ; Ll # LATIN SMALL LETTER C WITH ACUTE +0109 ; Ll # LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Ll # LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Ll # LATIN SMALL LETTER C WITH CARON +010F ; Ll # LATIN SMALL LETTER D WITH CARON +0111 ; Ll # LATIN SMALL LETTER D WITH STROKE +0113 ; Ll # LATIN SMALL LETTER E WITH MACRON +0115 ; Ll # LATIN SMALL LETTER E WITH BREVE +0117 ; Ll # LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Ll # LATIN SMALL LETTER E WITH OGONEK +011B ; Ll # LATIN SMALL LETTER E WITH CARON +011D ; Ll # LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Ll # LATIN SMALL LETTER G WITH BREVE +0121 ; Ll # LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Ll # LATIN SMALL LETTER G WITH CEDILLA +0125 ; Ll # LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Ll # LATIN SMALL LETTER H WITH STROKE +0129 ; Ll # LATIN SMALL LETTER I WITH TILDE +012B ; Ll # LATIN SMALL LETTER I WITH MACRON +012D ; Ll # LATIN SMALL LETTER I WITH BREVE +012F ; Ll # LATIN SMALL LETTER I WITH OGONEK +0131 ; Ll # LATIN SMALL LETTER DOTLESS I +0133 ; Ll # LATIN SMALL LIGATURE IJ +0135 ; Ll # LATIN SMALL LETTER J WITH CIRCUMFLEX +0137..0138 ; Ll # [2] LATIN SMALL LETTER K WITH CEDILLA..LATIN SMALL LETTER KRA +013A ; Ll # LATIN SMALL LETTER L WITH ACUTE +013C ; Ll # LATIN SMALL LETTER L WITH CEDILLA +013E ; Ll # LATIN SMALL LETTER L WITH CARON +0140 ; Ll # LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Ll # LATIN SMALL LETTER L WITH STROKE +0144 ; Ll # LATIN SMALL LETTER N WITH ACUTE +0146 ; Ll # LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Ll # [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Ll # LATIN SMALL LETTER ENG +014D ; Ll # LATIN SMALL LETTER O WITH MACRON +014F ; Ll # LATIN SMALL LETTER O WITH BREVE +0151 ; Ll # LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Ll # LATIN SMALL LIGATURE OE +0155 ; Ll # LATIN SMALL LETTER R WITH ACUTE +0157 ; Ll # LATIN SMALL LETTER R WITH CEDILLA +0159 ; Ll # LATIN SMALL LETTER R WITH CARON +015B ; Ll # LATIN SMALL LETTER S WITH ACUTE +015D ; Ll # LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Ll # LATIN SMALL LETTER S WITH CEDILLA +0161 ; Ll # LATIN SMALL LETTER S WITH CARON +0163 ; Ll # LATIN SMALL LETTER T WITH CEDILLA +0165 ; Ll # LATIN SMALL LETTER T WITH CARON +0167 ; Ll # LATIN SMALL LETTER T WITH STROKE +0169 ; Ll # LATIN SMALL LETTER U WITH TILDE +016B ; Ll # LATIN SMALL LETTER U WITH MACRON +016D ; Ll # LATIN SMALL LETTER U WITH BREVE +016F ; Ll # LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Ll # LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Ll # LATIN SMALL LETTER U WITH OGONEK +0175 ; Ll # LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Ll # LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Ll # LATIN SMALL LETTER Z WITH ACUTE +017C ; Ll # LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Ll # [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Ll # LATIN SMALL LETTER B WITH TOPBAR +0185 ; Ll # LATIN SMALL LETTER TONE SIX +0188 ; Ll # LATIN SMALL LETTER C WITH HOOK +018C..018D ; Ll # [2] LATIN SMALL LETTER D WITH TOPBAR..LATIN SMALL LETTER TURNED DELTA +0192 ; Ll # LATIN SMALL LETTER F WITH HOOK +0195 ; Ll # LATIN SMALL LETTER HV +0199..019B ; Ll # [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE +019E ; Ll # LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Ll # LATIN SMALL LETTER O WITH HORN +01A3 ; Ll # LATIN SMALL LETTER OI +01A5 ; Ll # LATIN SMALL LETTER P WITH HOOK +01A8 ; Ll # LATIN SMALL LETTER TONE TWO +01AA..01AB ; Ll # [2] LATIN LETTER REVERSED ESH LOOP..LATIN SMALL LETTER T WITH PALATAL HOOK +01AD ; Ll # LATIN SMALL LETTER T WITH HOOK +01B0 ; Ll # LATIN SMALL LETTER U WITH HORN +01B4 ; Ll # LATIN SMALL LETTER Y WITH HOOK +01B6 ; Ll # LATIN SMALL LETTER Z WITH STROKE +01B9..01BA ; Ll # [2] LATIN SMALL LETTER EZH REVERSED..LATIN SMALL LETTER EZH WITH TAIL +01BD..01BF ; Ll # [3] LATIN SMALL LETTER TONE FIVE..LATIN LETTER WYNN +01C6 ; Ll # LATIN SMALL LETTER DZ WITH CARON +01C9 ; Ll # LATIN SMALL LETTER LJ +01CC ; Ll # LATIN SMALL LETTER NJ +01CE ; Ll # LATIN SMALL LETTER A WITH CARON +01D0 ; Ll # LATIN SMALL LETTER I WITH CARON +01D2 ; Ll # LATIN SMALL LETTER O WITH CARON +01D4 ; Ll # LATIN SMALL LETTER U WITH CARON +01D6 ; Ll # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Ll # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Ll # LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Ll # [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Ll # LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Ll # LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Ll # LATIN SMALL LETTER AE WITH MACRON +01E5 ; Ll # LATIN SMALL LETTER G WITH STROKE +01E7 ; Ll # LATIN SMALL LETTER G WITH CARON +01E9 ; Ll # LATIN SMALL LETTER K WITH CARON +01EB ; Ll # LATIN SMALL LETTER O WITH OGONEK +01ED ; Ll # LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F0 ; Ll # [2] LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON +01F3 ; Ll # LATIN SMALL LETTER DZ +01F5 ; Ll # LATIN SMALL LETTER G WITH ACUTE +01F9 ; Ll # LATIN SMALL LETTER N WITH GRAVE +01FB ; Ll # LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Ll # LATIN SMALL LETTER AE WITH ACUTE +01FF ; Ll # LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Ll # LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Ll # LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Ll # LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Ll # LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Ll # LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Ll # LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Ll # LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Ll # LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Ll # LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Ll # LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Ll # LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Ll # LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Ll # LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Ll # LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Ll # LATIN SMALL LETTER YOGH +021F ; Ll # LATIN SMALL LETTER H WITH CARON +0221 ; Ll # LATIN SMALL LETTER D WITH CURL +0223 ; Ll # LATIN SMALL LETTER OU +0225 ; Ll # LATIN SMALL LETTER Z WITH HOOK +0227 ; Ll # LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Ll # LATIN SMALL LETTER E WITH CEDILLA +022B ; Ll # LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Ll # LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Ll # LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Ll # LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233..0239 ; Ll # [7] LATIN SMALL LETTER Y WITH MACRON..LATIN SMALL LETTER QP DIGRAPH +023C ; Ll # LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Ll # [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Ll # LATIN SMALL LETTER GLOTTAL STOP +0247 ; Ll # LATIN SMALL LETTER E WITH STROKE +0249 ; Ll # LATIN SMALL LETTER J WITH STROKE +024B ; Ll # LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Ll # LATIN SMALL LETTER R WITH STROKE +024F..0293 ; Ll # [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL +0295..02AF ; Ll # [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +0371 ; Ll # GREEK SMALL LETTER HETA +0373 ; Ll # GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Ll # GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Ll # [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Ll # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Ll # [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Ll # [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Ll # [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Ll # GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Ll # GREEK SMALL LETTER STIGMA +03DD ; Ll # GREEK SMALL LETTER DIGAMMA +03DF ; Ll # GREEK SMALL LETTER KOPPA +03E1 ; Ll # GREEK SMALL LETTER SAMPI +03E3 ; Ll # COPTIC SMALL LETTER SHEI +03E5 ; Ll # COPTIC SMALL LETTER FEI +03E7 ; Ll # COPTIC SMALL LETTER KHEI +03E9 ; Ll # COPTIC SMALL LETTER HORI +03EB ; Ll # COPTIC SMALL LETTER GANGIA +03ED ; Ll # COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Ll # [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Ll # GREEK LUNATE EPSILON SYMBOL +03F8 ; Ll # GREEK SMALL LETTER SHO +03FB..03FC ; Ll # [2] GREEK SMALL LETTER SAN..GREEK RHO WITH STROKE SYMBOL +0430..045F ; Ll # [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Ll # CYRILLIC SMALL LETTER OMEGA +0463 ; Ll # CYRILLIC SMALL LETTER YAT +0465 ; Ll # CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Ll # CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Ll # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Ll # CYRILLIC SMALL LETTER BIG YUS +046D ; Ll # CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Ll # CYRILLIC SMALL LETTER KSI +0471 ; Ll # CYRILLIC SMALL LETTER PSI +0473 ; Ll # CYRILLIC SMALL LETTER FITA +0475 ; Ll # CYRILLIC SMALL LETTER IZHITSA +0477 ; Ll # CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Ll # CYRILLIC SMALL LETTER UK +047B ; Ll # CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Ll # CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Ll # CYRILLIC SMALL LETTER OT +0481 ; Ll # CYRILLIC SMALL LETTER KOPPA +048B ; Ll # CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Ll # CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Ll # CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Ll # CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Ll # CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Ll # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Ll # CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Ll # CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Ll # CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Ll # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Ll # CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Ll # CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Ll # CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Ll # CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Ll # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Ll # CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Ll # CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Ll # CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Ll # CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Ll # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Ll # CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Ll # CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Ll # CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Ll # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Ll # CYRILLIC SMALL LETTER SHHA +04BD ; Ll # CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Ll # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Ll # CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Ll # CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Ll # CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Ll # CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Ll # CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Ll # CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Ll # [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Ll # CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Ll # CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Ll # CYRILLIC SMALL LIGATURE A IE +04D7 ; Ll # CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Ll # CYRILLIC SMALL LETTER SCHWA +04DB ; Ll # CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Ll # CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Ll # CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Ll # CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Ll # CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Ll # CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Ll # CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Ll # CYRILLIC SMALL LETTER BARRED O +04EB ; Ll # CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Ll # CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Ll # CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Ll # CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Ll # CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Ll # CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Ll # CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Ll # CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Ll # CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Ll # CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Ll # CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Ll # CYRILLIC SMALL LETTER KOMI DE +0503 ; Ll # CYRILLIC SMALL LETTER KOMI DJE +0505 ; Ll # CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Ll # CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Ll # CYRILLIC SMALL LETTER KOMI LJE +050B ; Ll # CYRILLIC SMALL LETTER KOMI NJE +050D ; Ll # CYRILLIC SMALL LETTER KOMI SJE +050F ; Ll # CYRILLIC SMALL LETTER KOMI TJE +0511 ; Ll # CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Ll # CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Ll # CYRILLIC SMALL LETTER LHA +0517 ; Ll # CYRILLIC SMALL LETTER RHA +0519 ; Ll # CYRILLIC SMALL LETTER YAE +051B ; Ll # CYRILLIC SMALL LETTER QA +051D ; Ll # CYRILLIC SMALL LETTER WE +051F ; Ll # CYRILLIC SMALL LETTER ALEUT KA +0521 ; Ll # CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Ll # CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Ll # CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Ll # CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Ll # CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Ll # CYRILLIC SMALL LETTER DZZHE +052D ; Ll # CYRILLIC SMALL LETTER DCHE +052F ; Ll # CYRILLIC SMALL LETTER EL WITH DESCENDER +0560..0588 ; Ll # [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +10D0..10FA ; Ll # [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Ll # [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13F8..13FD ; Ll # [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Ll # [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Ll # CYRILLIC SMALL LETTER TJE +1D00..1D2B ; Ll # [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D6B..1D77 ; Ll # [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D79..1D9A ; Ll # [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1E01 ; Ll # LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Ll # LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Ll # LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Ll # LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Ll # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Ll # LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Ll # LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Ll # LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Ll # LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Ll # LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Ll # LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Ll # LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Ll # LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Ll # LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Ll # LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Ll # LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Ll # LATIN SMALL LETTER G WITH MACRON +1E23 ; Ll # LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Ll # LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Ll # LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Ll # LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Ll # LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Ll # LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Ll # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Ll # LATIN SMALL LETTER K WITH ACUTE +1E33 ; Ll # LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Ll # LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Ll # LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Ll # LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Ll # LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Ll # LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Ll # LATIN SMALL LETTER M WITH ACUTE +1E41 ; Ll # LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Ll # LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Ll # LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Ll # LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Ll # LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Ll # LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Ll # LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Ll # LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Ll # LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Ll # LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Ll # LATIN SMALL LETTER P WITH ACUTE +1E57 ; Ll # LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Ll # LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Ll # LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Ll # LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Ll # LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Ll # LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Ll # LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Ll # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Ll # LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Ll # LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Ll # LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Ll # LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Ll # LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Ll # LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Ll # LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Ll # LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Ll # LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Ll # LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Ll # LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Ll # LATIN SMALL LETTER V WITH TILDE +1E7F ; Ll # LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Ll # LATIN SMALL LETTER W WITH GRAVE +1E83 ; Ll # LATIN SMALL LETTER W WITH ACUTE +1E85 ; Ll # LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Ll # LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Ll # LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Ll # LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Ll # LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Ll # LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Ll # LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Ll # LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9D ; Ll # [9] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH HIGH STROKE +1E9F ; Ll # LATIN SMALL LETTER DELTA +1EA1 ; Ll # LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Ll # LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Ll # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Ll # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Ll # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Ll # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Ll # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Ll # LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Ll # LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Ll # LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Ll # LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Ll # LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Ll # LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Ll # LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Ll # LATIN SMALL LETTER E WITH TILDE +1EBF ; Ll # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Ll # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Ll # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Ll # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Ll # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Ll # LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Ll # LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Ll # LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Ll # LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Ll # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Ll # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Ll # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Ll # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Ll # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Ll # LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Ll # LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Ll # LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Ll # LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Ll # LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Ll # LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Ll # LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Ll # LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Ll # LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Ll # LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Ll # LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Ll # LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Ll # LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Ll # LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Ll # LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Ll # LATIN SMALL LETTER Y WITH TILDE +1EFB ; Ll # LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Ll # LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Ll # [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Ll # [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Ll # [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Ll # [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Ll # [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Ll # [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Ll # [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Ll # [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1F87 ; Ll # [8] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F90..1F97 ; Ll # [8] GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA0..1FA7 ; Ll # [8] GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FB0..1FB4 ; Ll # [5] GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Ll # [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBE ; Ll # GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Ll # [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Ll # [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FD0..1FD3 ; Ll # [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Ll # [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Ll # [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Ll # [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Ll # [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +210A ; Ll # SCRIPT SMALL G +210E..210F ; Ll # [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI +2113 ; Ll # SCRIPT SMALL L +212F ; Ll # SCRIPT SMALL E +2134 ; Ll # SCRIPT SMALL O +2139 ; Ll # INFORMATION SOURCE +213C..213D ; Ll # [2] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK SMALL GAMMA +2146..2149 ; Ll # [4] DOUBLE-STRUCK ITALIC SMALL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Ll # TURNED SMALL F +2184 ; Ll # LATIN SMALL LETTER REVERSED C +2C30..2C5F ; Ll # [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Ll # LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Ll # [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Ll # LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Ll # LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Ll # LATIN SMALL LETTER Z WITH DESCENDER +2C71 ; Ll # LATIN SMALL LETTER V WITH RIGHT HOOK +2C73..2C74 ; Ll # [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL +2C76..2C7B ; Ll # [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C81 ; Ll # COPTIC SMALL LETTER ALFA +2C83 ; Ll # COPTIC SMALL LETTER VIDA +2C85 ; Ll # COPTIC SMALL LETTER GAMMA +2C87 ; Ll # COPTIC SMALL LETTER DALDA +2C89 ; Ll # COPTIC SMALL LETTER EIE +2C8B ; Ll # COPTIC SMALL LETTER SOU +2C8D ; Ll # COPTIC SMALL LETTER ZATA +2C8F ; Ll # COPTIC SMALL LETTER HATE +2C91 ; Ll # COPTIC SMALL LETTER THETHE +2C93 ; Ll # COPTIC SMALL LETTER IAUDA +2C95 ; Ll # COPTIC SMALL LETTER KAPA +2C97 ; Ll # COPTIC SMALL LETTER LAULA +2C99 ; Ll # COPTIC SMALL LETTER MI +2C9B ; Ll # COPTIC SMALL LETTER NI +2C9D ; Ll # COPTIC SMALL LETTER KSI +2C9F ; Ll # COPTIC SMALL LETTER O +2CA1 ; Ll # COPTIC SMALL LETTER PI +2CA3 ; Ll # COPTIC SMALL LETTER RO +2CA5 ; Ll # COPTIC SMALL LETTER SIMA +2CA7 ; Ll # COPTIC SMALL LETTER TAU +2CA9 ; Ll # COPTIC SMALL LETTER UA +2CAB ; Ll # COPTIC SMALL LETTER FI +2CAD ; Ll # COPTIC SMALL LETTER KHI +2CAF ; Ll # COPTIC SMALL LETTER PSI +2CB1 ; Ll # COPTIC SMALL LETTER OOU +2CB3 ; Ll # COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Ll # COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Ll # COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Ll # COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Ll # COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Ll # COPTIC SMALL LETTER SAMPI +2CC3 ; Ll # COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Ll # COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Ll # COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Ll # COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Ll # COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Ll # COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Ll # COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Ll # COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Ll # COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Ll # COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Ll # COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Ll # COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Ll # COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Ll # COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Ll # COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Ll # COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3..2CE4 ; Ll # [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI +2CEC ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Ll # COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Ll # [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Ll # GEORGIAN SMALL LETTER YN +2D2D ; Ll # GEORGIAN SMALL LETTER AEN +A641 ; Ll # CYRILLIC SMALL LETTER ZEMLYA +A643 ; Ll # CYRILLIC SMALL LETTER DZELO +A645 ; Ll # CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Ll # CYRILLIC SMALL LETTER IOTA +A649 ; Ll # CYRILLIC SMALL LETTER DJERV +A64B ; Ll # CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Ll # CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Ll # CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Ll # CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Ll # CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Ll # CYRILLIC SMALL LETTER REVERSED YU +A657 ; Ll # CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Ll # CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Ll # CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Ll # CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Ll # CYRILLIC SMALL LETTER YN +A661 ; Ll # CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Ll # CYRILLIC SMALL LETTER SOFT DE +A665 ; Ll # CYRILLIC SMALL LETTER SOFT EL +A667 ; Ll # CYRILLIC SMALL LETTER SOFT EM +A669 ; Ll # CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Ll # CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Ll # CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Ll # CYRILLIC SMALL LETTER DWE +A683 ; Ll # CYRILLIC SMALL LETTER DZWE +A685 ; Ll # CYRILLIC SMALL LETTER ZHWE +A687 ; Ll # CYRILLIC SMALL LETTER CCHE +A689 ; Ll # CYRILLIC SMALL LETTER DZZE +A68B ; Ll # CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Ll # CYRILLIC SMALL LETTER TWE +A68F ; Ll # CYRILLIC SMALL LETTER TSWE +A691 ; Ll # CYRILLIC SMALL LETTER TSSE +A693 ; Ll # CYRILLIC SMALL LETTER TCHE +A695 ; Ll # CYRILLIC SMALL LETTER HWE +A697 ; Ll # CYRILLIC SMALL LETTER SHWE +A699 ; Ll # CYRILLIC SMALL LETTER DOUBLE O +A69B ; Ll # CYRILLIC SMALL LETTER CROSSED O +A723 ; Ll # LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Ll # LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Ll # LATIN SMALL LETTER HENG +A729 ; Ll # LATIN SMALL LETTER TZ +A72B ; Ll # LATIN SMALL LETTER TRESILLO +A72D ; Ll # LATIN SMALL LETTER CUATRILLO +A72F..A731 ; Ll # [3] LATIN SMALL LETTER CUATRILLO WITH COMMA..LATIN LETTER SMALL CAPITAL S +A733 ; Ll # LATIN SMALL LETTER AA +A735 ; Ll # LATIN SMALL LETTER AO +A737 ; Ll # LATIN SMALL LETTER AU +A739 ; Ll # LATIN SMALL LETTER AV +A73B ; Ll # LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Ll # LATIN SMALL LETTER AY +A73F ; Ll # LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Ll # LATIN SMALL LETTER K WITH STROKE +A743 ; Ll # LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Ll # LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Ll # LATIN SMALL LETTER BROKEN L +A749 ; Ll # LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Ll # LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Ll # LATIN SMALL LETTER O WITH LOOP +A74F ; Ll # LATIN SMALL LETTER OO +A751 ; Ll # LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Ll # LATIN SMALL LETTER P WITH FLOURISH +A755 ; Ll # LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Ll # LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Ll # LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Ll # LATIN SMALL LETTER R ROTUNDA +A75D ; Ll # LATIN SMALL LETTER RUM ROTUNDA +A75F ; Ll # LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Ll # LATIN SMALL LETTER VY +A763 ; Ll # LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Ll # LATIN SMALL LETTER THORN WITH STROKE +A767 ; Ll # LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Ll # LATIN SMALL LETTER VEND +A76B ; Ll # LATIN SMALL LETTER ET +A76D ; Ll # LATIN SMALL LETTER IS +A76F ; Ll # LATIN SMALL LETTER CON +A771..A778 ; Ll # [8] LATIN SMALL LETTER DUM..LATIN SMALL LETTER UM +A77A ; Ll # LATIN SMALL LETTER INSULAR D +A77C ; Ll # LATIN SMALL LETTER INSULAR F +A77F ; Ll # LATIN SMALL LETTER TURNED INSULAR G +A781 ; Ll # LATIN SMALL LETTER TURNED L +A783 ; Ll # LATIN SMALL LETTER INSULAR R +A785 ; Ll # LATIN SMALL LETTER INSULAR S +A787 ; Ll # LATIN SMALL LETTER INSULAR T +A78C ; Ll # LATIN SMALL LETTER SALTILLO +A78E ; Ll # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A791 ; Ll # LATIN SMALL LETTER N WITH DESCENDER +A793..A795 ; Ll # [3] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER H WITH PALATAL HOOK +A797 ; Ll # LATIN SMALL LETTER B WITH FLOURISH +A799 ; Ll # LATIN SMALL LETTER F WITH STROKE +A79B ; Ll # LATIN SMALL LETTER VOLAPUK AE +A79D ; Ll # LATIN SMALL LETTER VOLAPUK OE +A79F ; Ll # LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Ll # LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Ll # LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Ll # LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Ll # LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Ll # LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AF ; Ll # LATIN LETTER SMALL CAPITAL Q +A7B5 ; Ll # LATIN SMALL LETTER BETA +A7B7 ; Ll # LATIN SMALL LETTER OMEGA +A7B9 ; Ll # LATIN SMALL LETTER U WITH STROKE +A7BB ; Ll # LATIN SMALL LETTER GLOTTAL A +A7BD ; Ll # LATIN SMALL LETTER GLOTTAL I +A7BF ; Ll # LATIN SMALL LETTER GLOTTAL U +A7C1 ; Ll # LATIN SMALL LETTER OLD POLISH O +A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W +A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7CD ; Ll # LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7D1 ; Ll # LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Ll # LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Ll # LATIN SMALL LETTER DOUBLE WYNN +A7D7 ; Ll # LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Ll # LATIN SMALL LETTER SIGMOID S +A7DB ; Ll # LATIN SMALL LETTER LAMBDA +A7F6 ; Ll # LATIN SMALL LETTER REVERSED HALF H +A7FA ; Ll # LATIN LETTER SMALL CAPITAL TURNED M +AB30..AB5A ; Ll # [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB60..AB68 ; Ll # [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB70..ABBF ; Ll # [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Ll # [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Ll # [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Ll # [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Ll # [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Ll # [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Ll # [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Ll # [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Ll # [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10CC0..10CF2 ; Ll # [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Ll # [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +118C0..118DF ; Ll # [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Ll # [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1D41A..1D433 ; Ll # [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z +1D44E..1D454 ; Ll # [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G +1D456..1D467 ; Ll # [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z +1D482..1D49B ; Ll # [26] MATHEMATICAL BOLD ITALIC SMALL A..MATHEMATICAL BOLD ITALIC SMALL Z +1D4B6..1D4B9 ; Ll # [4] MATHEMATICAL SCRIPT SMALL A..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Ll # MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Ll # [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D4CF ; Ll # [11] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL SCRIPT SMALL Z +1D4EA..1D503 ; Ll # [26] MATHEMATICAL BOLD SCRIPT SMALL A..MATHEMATICAL BOLD SCRIPT SMALL Z +1D51E..1D537 ; Ll # [26] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL FRAKTUR SMALL Z +1D552..1D56B ; Ll # [26] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL DOUBLE-STRUCK SMALL Z +1D586..1D59F ; Ll # [26] MATHEMATICAL BOLD FRAKTUR SMALL A..MATHEMATICAL BOLD FRAKTUR SMALL Z +1D5BA..1D5D3 ; Ll # [26] MATHEMATICAL SANS-SERIF SMALL A..MATHEMATICAL SANS-SERIF SMALL Z +1D5EE..1D607 ; Ll # [26] MATHEMATICAL SANS-SERIF BOLD SMALL A..MATHEMATICAL SANS-SERIF BOLD SMALL Z +1D622..1D63B ; Ll # [26] MATHEMATICAL SANS-SERIF ITALIC SMALL A..MATHEMATICAL SANS-SERIF ITALIC SMALL Z +1D656..1D66F ; Ll # [26] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL A..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Z +1D68A..1D6A5 ; Ll # [28] MATHEMATICAL MONOSPACE SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6C2..1D6DA ; Ll # [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6E1 ; Ll # [6] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL BOLD PI SYMBOL +1D6FC..1D714 ; Ll # [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D71B ; Ll # [6] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL ITALIC PI SYMBOL +1D736..1D74E ; Ll # [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D755 ; Ll # [6] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC PI SYMBOL +1D770..1D788 ; Ll # [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D78F ; Ll # [6] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD PI SYMBOL +1D7AA..1D7C2 ; Ll # [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7C9 ; Ll # [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL +1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Ll # [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0B..1DF1E ; Ll # [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 2258 + +# ================================================ + +# General_Category=Titlecase_Letter + +01C5 ; Lt # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C8 ; Lt # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CB ; Lt # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01F2 ; Lt # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +1F88..1F8F ; Lt # [8] GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F98..1F9F ; Lt # [8] GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA8..1FAF ; Lt # [8] GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FBC ; Lt # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FCC ; Lt # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FFC ; Lt # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + +# Total code points: 31 + +# ================================================ + +# General_Category=Modifier_Letter + +02B0..02C1 ; Lm # [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; Lm # [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; Lm # [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; Lm # MODIFIER LETTER VOICING +02EE ; Lm # MODIFIER LETTER DOUBLE APOSTROPHE +0374 ; Lm # GREEK NUMERAL SIGN +037A ; Lm # GREEK YPOGEGRAMMENI +0559 ; Lm # ARMENIAN MODIFIER LETTER LEFT HALF RING +0640 ; Lm # ARABIC TATWEEL +06E5..06E6 ; Lm # [2] ARABIC SMALL WAW..ARABIC SMALL YEH +07F4..07F5 ; Lm # [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; Lm # NKO LAJANYALAN +081A ; Lm # SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; Lm # SAMARITAN MODIFIER LETTER SHORT A +0828 ; Lm # SAMARITAN MODIFIER LETTER I +08C9 ; Lm # ARABIC SMALL FARSI YEH +0971 ; Lm # DEVANAGARI SIGN HIGH SPACING DOT +0E46 ; Lm # THAI CHARACTER MAIYAMOK +0EC6 ; Lm # LAO KO LA +10FC ; Lm # MODIFIER LETTER GEORGIAN NAR +17D7 ; Lm # KHMER SIGN LEK TOO +1843 ; Lm # MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Lm # TAI THAM SIGN MAI YAMOK +1C78..1C7D ; Lm # [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1D2C..1D6A ; Lm # [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Lm # MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Lm # [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Lm # SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lm # SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lm # [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2C7C..2C7D ; Lm # [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2D6F ; Lm # TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2E2F ; Lm # VERTICAL TILDE +3005 ; Lm # IDEOGRAPHIC ITERATION MARK +3031..3035 ; Lm # [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +303B ; Lm # VERTICAL IDEOGRAPHIC ITERATION MARK +309D..309E ; Lm # [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Lm # [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Lm # YI SYLLABLE WU +A4F8..A4FD ; Lm # [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A60C ; Lm # VAI SYLLABLE LENGTHENER +A67F ; Lm # CYRILLIC PAYEROK +A69C..A69D ; Lm # [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A717..A71F ; Lm # [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A770 ; Lm # MODIFIER LETTER US +A788 ; Lm # MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F2..A7F4 ; Lm # [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F8..A7F9 ; Lm # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A9CF ; Lm # JAVANESE PANGRANGKEP +A9E6 ; Lm # MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Lm # MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Lm # TAI VIET SYMBOL SAM +AAF3..AAF4 ; Lm # [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AB5C..AB5F ; Lm # [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Lm # MODIFIER LETTER SMALL TURNED W +FF70 ; Lm # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +10780..10785 ; Lm # [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Lm # [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10D4E ; Lm # GARAY VOWEL LENGTH MARK +10D6F ; Lm # GARAY REDUPLICATION MARK +16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16D40..16D42 ; Lm # [3] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN VISARGA +16D6B..16D6C ; Lm # [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Lm # [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Lm # OLD CHINESE ITERATION MARK +1AFF0..1AFF3 ; Lm # [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Lm # [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Lm # [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1E030..1E06D ; Lm # [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E4EB ; Lm # NAG MUNDARI SIGN OJOD +1E94B ; Lm # ADLAM NASALIZATION MARK + +# Total code points: 404 + +# ================================================ + +# General_Category=Other_Letter + +00AA ; Lo # FEMININE ORDINAL INDICATOR +00BA ; Lo # MASCULINE ORDINAL INDICATOR +01BB ; Lo # LATIN LETTER TWO WITH STROKE +01C0..01C3 ; Lo # [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +0294 ; Lo # LATIN LETTER GLOTTAL STOP +05D0..05EA ; Lo # [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Lo # [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0620..063F ; Lo # [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0641..064A ; Lo # [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; Lo # [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; Lo # [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; Lo # ARABIC LETTER AE +06EE..06EF ; Lo # [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; Lo # [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; Lo # ARABIC LETTER HEH WITH INVERTED V +0710 ; Lo # SYRIAC LETTER ALAPH +0712..072F ; Lo # [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; Lo # [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; Lo # THAANA LETTER NAA +07CA..07EA ; Lo # [33] NKO LETTER A..NKO LETTER JONA RA +0800..0815 ; Lo # [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; Lo # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; Lo # [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; Lo # [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; Lo # [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; Lo # DEVANAGARI SIGN AVAGRAHA +0950 ; Lo # DEVANAGARI OM +0958..0961 ; Lo # [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0972..0980 ; Lo # [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0985..098C ; Lo # [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Lo # [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Lo # [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Lo # [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Lo # BENGALI LETTER LA +09B6..09B9 ; Lo # [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; Lo # BENGALI SIGN AVAGRAHA +09CE ; Lo # BENGALI LETTER KHANDA TA +09DC..09DD ; Lo # [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Lo # [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; Lo # [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; Lo # BENGALI LETTER VEDIC ANUSVARA +0A05..0A0A ; Lo # [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Lo # [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Lo # [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Lo # [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Lo # [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Lo # [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Lo # [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A59..0A5C ; Lo # [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Lo # GURMUKHI LETTER FA +0A72..0A74 ; Lo # [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A85..0A8D ; Lo # [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Lo # [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Lo # [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Lo # [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Lo # [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Lo # [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; Lo # GUJARATI SIGN AVAGRAHA +0AD0 ; Lo # GUJARATI OM +0AE0..0AE1 ; Lo # [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF9 ; Lo # GUJARATI LETTER ZHA +0B05..0B0C ; Lo # [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Lo # [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Lo # [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Lo # [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Lo # [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Lo # [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; Lo # ORIYA SIGN AVAGRAHA +0B5C..0B5D ; Lo # [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Lo # [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; Lo # ORIYA LETTER WA +0B83 ; Lo # TAMIL SIGN VISARGA +0B85..0B8A ; Lo # [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Lo # [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Lo # [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Lo # [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Lo # TAMIL LETTER JA +0B9E..0B9F ; Lo # [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Lo # [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Lo # [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Lo # [12] TAMIL LETTER MA..TAMIL LETTER HA +0BD0 ; Lo # TAMIL OM +0C05..0C0C ; Lo # [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Lo # [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Lo # [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; Lo # [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; Lo # TELUGU SIGN AVAGRAHA +0C58..0C5A ; Lo # [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; Lo # TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; Lo # [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; Lo # KANNADA SIGN SPACING CANDRABINDU +0C85..0C8C ; Lo # [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Lo # [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Lo # [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Lo # [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Lo # [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; Lo # KANNADA SIGN AVAGRAHA +0CDD..0CDE ; Lo # [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Lo # [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; Lo # [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; Lo # MALAYALAM SIGN AVAGRAHA +0D4E ; Lo # MALAYALAM LETTER DOT REPH +0D54..0D56 ; Lo # [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D5F..0D61 ; Lo # [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D7A..0D7F ; Lo # [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D85..0D96 ; Lo # [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Lo # [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Lo # [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Lo # SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Lo # [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0E01..0E30 ; Lo # [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; Lo # [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E40..0E45 ; Lo # [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E81..0E82 ; Lo # [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Lo # LAO LETTER KHO TAM +0E86..0E8A ; Lo # [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; Lo # [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; Lo # LAO LETTER LO LOOT +0EA7..0EB0 ; Lo # [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; Lo # [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; Lo # LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Lo # [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EDC..0EDF ; Lo # [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; Lo # TIBETAN SYLLABLE OM +0F40..0F47 ; Lo # [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Lo # [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F88..0F8C ; Lo # [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +1000..102A ; Lo # [43] MYANMAR LETTER KA..MYANMAR LETTER AU +103F ; Lo # MYANMAR LETTER GREAT SA +1050..1055 ; Lo # [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +105A..105D ; Lo # [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; Lo # MYANMAR LETTER SGAW KAREN SHA +1065..1066 ; Lo # [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +106E..1070 ; Lo # [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; Lo # [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +108E ; Lo # MYANMAR LETTER RUMAI PALAUNG FA +1100..1248 ; Lo # [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; Lo # [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Lo # [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Lo # ETHIOPIC SYLLABLE QHWA +125A..125D ; Lo # [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Lo # [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Lo # [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Lo # [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Lo # [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Lo # [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Lo # ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Lo # [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Lo # [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Lo # [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Lo # [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Lo # [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; Lo # [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1401..166C ; Lo # [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; Lo # [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; Lo # [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; Lo # [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16F1..16F8 ; Lo # [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; Lo # [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; Lo # [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1740..1751 ; Lo # [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; Lo # [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Lo # [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; Lo # [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17DC ; Lo # KHMER SIGN AVAKRAHASANYA +1820..1842 ; Lo # [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1844..1878 ; Lo # [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; Lo # [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; Lo # [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; Lo # MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; Lo # [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; Lo # [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1950..196D ; Lo # [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Lo # [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; Lo # [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; Lo # [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; Lo # [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A20..1A54 ; Lo # [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1B05..1B33 ; Lo # [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B45..1B4C ; Lo # [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B83..1BA0 ; Lo # [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BAE..1BAF ; Lo # [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; Lo # [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1C00..1C23 ; Lo # [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C4D..1C4F ; Lo # [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; Lo # [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1CE9..1CEC ; Lo # [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; Lo # [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Lo # [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; Lo # VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +2135..2138 ; Lo # [4] ALEF SYMBOL..DALET SYMBOL +2D30..2D67 ; Lo # [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D80..2D96 ; Lo # [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Lo # [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Lo # [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Lo # [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Lo # [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Lo # [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Lo # [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Lo # [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Lo # [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3006 ; Lo # IDEOGRAPHIC CLOSING MARK +303C ; Lo # MASU MARK +3041..3096 ; Lo # [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309F ; Lo # HIRAGANA DIGRAPH YORI +30A1..30FA ; Lo # [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FF ; Lo # KATAKANA DIGRAPH KOTO +3105..312F ; Lo # [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; Lo # [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; Lo # [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; Lo # [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; Lo # [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; Lo # [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A016..A48C ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; Lo # [40] LISU LETTER BA..LISU LETTER OE +A500..A60B ; Lo # [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A610..A61F ; Lo # [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; Lo # [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A66E ; Lo # CYRILLIC LETTER MULTIOCULAR O +A6A0..A6E5 ; Lo # [70] BAMUM LETTER A..BAMUM LETTER KI +A78F ; Lo # LATIN LETTER SINOLOGICAL DOT +A7F7 ; Lo # LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7FB..A801 ; Lo # [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; Lo # [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; Lo # [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; Lo # [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A840..A873 ; Lo # [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A882..A8B3 ; Lo # [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8F2..A8F7 ; Lo # [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; Lo # DEVANAGARI HEADSTROKE +A8FD..A8FE ; Lo # [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A90A..A925 ; Lo # [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A930..A946 ; Lo # [23] REJANG LETTER KA..REJANG LETTER A +A960..A97C ; Lo # [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A984..A9B2 ; Lo # [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9E0..A9E4 ; Lo # [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E7..A9EF ; Lo # [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; Lo # [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; Lo # [41] CHAM LETTER A..CHAM LETTER HA +AA40..AA42 ; Lo # [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; Lo # [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA60..AA6F ; Lo # [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA71..AA76 ; Lo # [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; Lo # MYANMAR LETTER AITON RA +AA7E..AAAF ; Lo # [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; Lo # TAI VIET VOWEL AA +AAB5..AAB6 ; Lo # [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; Lo # [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; Lo # TAI VIET TONE MAI NUENG +AAC2 ; Lo # TAI VIET TONE MAI SONG +AADB..AADC ; Lo # [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AAE0..AAEA ; Lo # [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; Lo # MEETEI MAYEK ANJI +AB01..AB06 ; Lo # [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; Lo # [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; Lo # [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; Lo # [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; Lo # [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +ABC0..ABE2 ; Lo # [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +AC00..D7A3 ; Lo # [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Lo # [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Lo # [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; Lo # [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Lo # [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB1D ; Lo # HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; Lo # [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; Lo # [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Lo # [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Lo # HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Lo # [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Lo # [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; Lo # [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; Lo # [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; Lo # [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Lo # [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; Lo # [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; Lo # [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Lo # [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF66..FF6F ; Lo # [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF71..FF9D ; Lo # [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FFA0..FFBE ; Lo # [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Lo # [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Lo # [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Lo # [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; Lo # [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Lo # [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Lo # [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Lo # [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Lo # [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Lo # [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Lo # [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10280..1029C ; Lo # [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; Lo # [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; Lo # [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; Lo # [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10342..10349 ; Lo # [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +10350..10375 ; Lo # [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; Lo # [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; Lo # [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Lo # [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +10450..1049D ; Lo # [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +10500..10527 ; Lo # [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; Lo # [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +105C0..105F3 ; Lo # [52] TODHRI LETTER A..TODHRI LETTER OO +10600..10736 ; Lo # [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; Lo # [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; Lo # [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10800..10805 ; Lo # [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Lo # CYPRIOT SYLLABLE JO +1080A..10835 ; Lo # [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Lo # [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Lo # CYPRIOT SYLLABLE ZA +1083F..10855 ; Lo # [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; Lo # [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; Lo # [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; Lo # [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; Lo # [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; Lo # [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; Lo # [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Lo # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Lo # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; Lo # KHAROSHTHI LETTER A +10A10..10A13 ; Lo # [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Lo # [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; Lo # [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; Lo # [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; Lo # [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; Lo # [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; Lo # [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; Lo # [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; Lo # [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; Lo # [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; Lo # [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10D00..10D23 ; Lo # [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; Lo # [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4F ; Lo # GARAY SUKUN +10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; Lo # [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; Lo # [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; Lo # [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11003..11037 ; Lo # [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; Lo # [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; Lo # BRAHMI LETTER OLD TAMIL LLA +11083..110AF ; Lo # [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; Lo # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; Lo # [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11144 ; Lo # CHAKMA LETTER LHAA +11147 ; Lo # CHAKMA LETTER VAA +11150..11172 ; Lo # [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; Lo # MAHAJANI LIGATURE SHRI +11183..111B2 ; Lo # [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; Lo # [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111DA ; Lo # SHARADA EKAM +111DC ; Lo # SHARADA HEADSTROKE +11200..11211 ; Lo # [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; Lo # [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1123F..11240 ; Lo # [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11280..11286 ; Lo # [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; Lo # MULTANI LETTER GHA +1128A..1128D ; Lo # [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; Lo # [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; Lo # [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; Lo # [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +11305..1130C ; Lo # [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; Lo # [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; Lo # [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; Lo # [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; Lo # [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; Lo # [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; Lo # GRANTHA SIGN AVAGRAHA +11350 ; Lo # GRANTHA OM +1135D..11361 ; Lo # [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11380..11389 ; Lo # [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Lo # TULU-TIGALARI LETTER EE +1138E ; Lo # TULU-TIGALARI LETTER AI +11390..113B5 ; Lo # [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7 ; Lo # TULU-TIGALARI SIGN AVAGRAHA +113D1 ; Lo # TULU-TIGALARI REPHA +113D3 ; Lo # TULU-TIGALARI SIGN PLUTA +11400..11434 ; Lo # [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; Lo # [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; Lo # [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; Lo # [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114C4..114C5 ; Lo # [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; Lo # TIRHUTA OM +11580..115AE ; Lo # [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115D8..115DB ; Lo # [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; Lo # [48] MODI LETTER A..MODI LETTER LLA +11644 ; Lo # MODI SIGN HUVA +11680..116AA ; Lo # [43] TAKRI LETTER A..TAKRI LETTER RRA +116B8 ; Lo # TAKRI LETTER ARCHAIC KHA +11700..1171A ; Lo # [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11740..11746 ; Lo # [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; Lo # [44] DOGRA LETTER A..DOGRA LETTER RRA +118FF..11906 ; Lo # [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; Lo # DIVES AKURU LETTER O +1190C..11913 ; Lo # [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; Lo # [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; Lo # [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +1193F ; Lo # DIVES AKURU PREFIXED NASAL SIGN +11941 ; Lo # DIVES AKURU INITIAL RA +119A0..119A7 ; Lo # [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; Lo # [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119E1 ; Lo # NANDINAGARI SIGN AVAGRAHA +119E3 ; Lo # NANDINAGARI HEADSTROKE +11A00 ; Lo # ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; Lo # [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; Lo # ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; Lo # SOYOMBO LETTER A +11A5C..11A89 ; Lo # [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A9D ; Lo # SOYOMBO MARK PLUTA +11AB0..11AF8 ; Lo # [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Lo # [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11C00..11C08 ; Lo # [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; Lo # [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; Lo # BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; Lo # [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; Lo # [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; Lo # [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; Lo # [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; Lo # MASARAM GONDI REPHA +11D60..11D65 ; Lo # [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; Lo # [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; Lo # [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D98 ; Lo # GUNJALA GONDI OM +11EE0..11EF2 ; Lo # [19] MAKASAR LETTER KA..MAKASAR ANGKA +11F02 ; Lo # KAWI SIGN REPHA +11F04..11F10 ; Lo # [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; Lo # [34] KAWI LETTER KA..KAWI LETTER JNYA +11FB0 ; Lo # LISU LETTER YHA +12000..12399 ; Lo # [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12480..12543 ; Lo # [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; Lo # [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342F ; Lo # [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13441..13446 ; Lo # [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13460..143FA ; Lo # [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA +14400..14646 ; Lo # [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..1611D ; Lo # [30] GURUNG KHEMA LETTER A..GURUNG KHEMA LETTER SA +16800..16A38 ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; Lo # [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; Lo # [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; Lo # [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; Lo # [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B63..16B77 ; Lo # [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; Lo # [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D43..16D6A ; Lo # [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU +16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; Lo # MIAO LETTER NASALIZATION +17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; Lo # [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B132 ; Lo # HIRAGANA LETTER SMALL KO +1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; Lo # KATAKANA LETTER SMALL KO +1B164..1B167 ; Lo # [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; Lo # [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; Lo # [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Lo # [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Lo # [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Lo # [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1DF0A ; Lo # LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1E100..1E12C ; Lo # [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E14E ; Lo # NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; Lo # [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Lo # [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E4D0..1E4EA ; Lo # [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E5D0..1E5ED ; Lo # [30] OL ONAL LETTER O..OL ONAL LETTER EG +1E5F0 ; Lo # OL ONAL SIGN HODDOND +1E7E0..1E7E6 ; Lo # [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; Lo # [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; Lo # [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; Lo # [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; Lo # [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1EE00..1EE03 ; Lo # [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Lo # [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Lo # [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Lo # ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Lo # ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Lo # [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Lo # [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Lo # ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Lo # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Lo # ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Lo # ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Lo # ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Lo # ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Lo # [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Lo # [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Lo # ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Lo # ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Lo # ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Lo # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Lo # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Lo # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Lo # [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Lo # ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Lo # [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Lo # [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Lo # [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Lo # [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Lo # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Lo # [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Lo # [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Lo # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Lo # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Lo # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +20000..2A6DF ; Lo # [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; Lo # [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Lo # [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 136477 + +# ================================================ + +# General_Category=Nonspacing_Mark + +0300..036F ; Mn # [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; Mn # [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0591..05BD ; Mn # [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Mn # HEBREW POINT RAFE +05C1..05C2 ; Mn # [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Mn # [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Mn # HEBREW POINT QAMATS QATAN +0610..061A ; Mn # [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Mn # [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Mn # ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Mn # [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; Mn # [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Mn # [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Mn # [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; Mn # SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Mn # [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Mn # [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Mn # [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Mn # NKO DANTAYALAN +0816..0819 ; Mn # [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Mn # [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Mn # [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Mn # [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Mn # [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0897..089F ; Mn # [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Mn # [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; Mn # [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Mn # DEVANAGARI VOWEL SIGN OE +093C ; Mn # DEVANAGARI SIGN NUKTA +0941..0948 ; Mn # [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Mn # DEVANAGARI SIGN VIRAMA +0951..0957 ; Mn # [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Mn # [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Mn # BENGALI SIGN CANDRABINDU +09BC ; Mn # BENGALI SIGN NUKTA +09C1..09C4 ; Mn # [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Mn # BENGALI SIGN VIRAMA +09E2..09E3 ; Mn # [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Mn # BENGALI SANDHI MARK +0A01..0A02 ; Mn # [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Mn # GURMUKHI SIGN NUKTA +0A41..0A42 ; Mn # [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Mn # [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Mn # [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Mn # GURMUKHI SIGN UDAAT +0A70..0A71 ; Mn # [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Mn # GURMUKHI SIGN YAKASH +0A81..0A82 ; Mn # [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Mn # GUJARATI SIGN NUKTA +0AC1..0AC5 ; Mn # [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Mn # [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Mn # GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Mn # [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Mn # [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Mn # ORIYA SIGN CANDRABINDU +0B3C ; Mn # ORIYA SIGN NUKTA +0B3F ; Mn # ORIYA VOWEL SIGN I +0B41..0B44 ; Mn # [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Mn # ORIYA SIGN VIRAMA +0B55..0B56 ; Mn # [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; Mn # [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Mn # TAMIL SIGN ANUSVARA +0BC0 ; Mn # TAMIL VOWEL SIGN II +0BCD ; Mn # TAMIL SIGN VIRAMA +0C00 ; Mn # TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Mn # TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Mn # TELUGU SIGN NUKTA +0C3E..0C40 ; Mn # [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Mn # [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Mn # [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Mn # [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Mn # [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Mn # KANNADA SIGN CANDRABINDU +0CBC ; Mn # KANNADA SIGN NUKTA +0CBF ; Mn # KANNADA VOWEL SIGN I +0CC6 ; Mn # KANNADA VOWEL SIGN E +0CCC..0CCD ; Mn # [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; Mn # [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Mn # [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Mn # [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; Mn # [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Mn # MALAYALAM SIGN VIRAMA +0D62..0D63 ; Mn # [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Mn # SINHALA SIGN CANDRABINDU +0DCA ; Mn # SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; Mn # [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Mn # SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; Mn # THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Mn # [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; Mn # [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Mn # LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Mn # [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; Mn # [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; Mn # [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Mn # TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Mn # TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Mn # TIBETAN MARK TSA -PHRU +0F71..0F7E ; Mn # [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Mn # [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Mn # [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Mn # [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Mn # [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Mn # TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Mn # [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Mn # [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Mn # [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Mn # [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Mn # [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Mn # [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Mn # [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Mn # MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Mn # [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Mn # MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Mn # MYANMAR VOWEL SIGN AITON AI +135D..135F ; Mn # [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Mn # [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; Mn # [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Mn # [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Mn # [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Mn # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Mn # [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Mn # KHMER SIGN NIKAHIT +17C9..17D3 ; Mn # [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Mn # KHMER SIGN ATTHACAN +180B..180D ; Mn # [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Mn # MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; Mn # [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Mn # MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Mn # [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Mn # [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Mn # LIMBU SMALL LETTER ANUSVARA +1939..193B ; Mn # [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Mn # [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Mn # BUGINESE VOWEL SIGN AE +1A56 ; Mn # TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Mn # [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Mn # TAI THAM SIGN SAKOT +1A62 ; Mn # TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Mn # [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABF..1ACE ; Mn # [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Mn # BALINESE SIGN REREKAN +1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; Mn # BALINESE VOWEL SIGN LA LENGA +1B42 ; Mn # BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; Mn # [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Mn # [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Mn # [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Mn # [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; Mn # [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Mn # BATAK SIGN TOMPI +1BE8..1BE9 ; Mn # [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Mn # BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Mn # [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; Mn # [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Mn # [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Mn # [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Mn # [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Mn # [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Mn # VEDIC SIGN TIRYAK +1CF4 ; Mn # VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Mn # [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Mn # [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +20D0..20DC ; Mn # [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Mn # COMBINING LEFT RIGHT ARROW ABOVE +20E5..20F0 ; Mn # [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Mn # [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Mn # TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Mn # [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Mn # [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3099..309A ; Mn # [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Mn # COMBINING CYRILLIC VZMET +A674..A67D ; Mn # [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; Mn # [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Mn # [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Mn # SYLOTI NAGRI SIGN DVISVARA +A806 ; Mn # SYLOTI NAGRI SIGN HASANTA +A80B ; Mn # SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Mn # [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Mn # SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Mn # [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Mn # [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Mn # DEVANAGARI VOWEL SIGN AY +A926..A92D ; Mn # [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Mn # [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; Mn # [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Mn # JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Mn # [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Mn # [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; Mn # MYANMAR SIGN SHAN SAW +AA29..AA2E ; Mn # [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Mn # [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Mn # [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Mn # CHAM CONSONANT SIGN FINAL NG +AA4C ; Mn # CHAM CONSONANT SIGN FINAL M +AA7C ; Mn # MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Mn # TAI VIET MAI KANG +AAB2..AAB4 ; Mn # [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Mn # [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Mn # [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Mn # TAI VIET TONE MAI THO +AAEC..AAED ; Mn # [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Mn # MEETEI MAYEK VIRAMA +ABE5 ; Mn # MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Mn # MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Mn # MEETEI MAYEK APUN IYEK +FB1E ; Mn # HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; Mn # [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +101FD ; Mn # PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Mn # COPTIC EPACT THOUSANDS MARK +10376..1037A ; Mn # [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Mn # [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Mn # [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Mn # [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Mn # [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Mn # KHAROSHTHI VIRAMA +10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Mn # BRAHMI SIGN ANUSVARA +11038..11046 ; Mn # [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Mn # BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Mn # [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Mn # [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Mn # [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Mn # [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Mn # KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Mn # [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Mn # [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Mn # [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Mn # MAHAJANI SIGN NUKTA +11180..11181 ; Mn # [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Mn # [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; Mn # [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Mn # SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Mn # [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Mn # KHOJKI SIGN ANUSVARA +11236..11237 ; Mn # [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Mn # KHOJKI SIGN SUKUN +11241 ; Mn # KHOJKI VOWEL SIGN VOCALIC R +112DF ; Mn # KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Mn # [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Mn # [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Mn # [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; Mn # GRANTHA VOWEL SIGN II +11366..1136C ; Mn # [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Mn # [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113BB..113C0 ; Mn # [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113CE ; Mn # TULU-TIGALARI SIGN VIRAMA +113D0 ; Mn # TULU-TIGALARI CONJOINER +113D2 ; Mn # TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Mn # [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11438..1143F ; Mn # [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Mn # [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Mn # NEWA SIGN NUKTA +1145E ; Mn # NEWA SANDHI MARK +114B3..114B8 ; Mn # [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Mn # TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; Mn # [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Mn # [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; Mn # [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Mn # [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Mn # [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Mn # [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Mn # [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Mn # MODI SIGN ANUSVARA +1163F..11640 ; Mn # [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Mn # TAKRI SIGN ANUSVARA +116AD ; Mn # TAKRI VOWEL SIGN AA +116B0..116B5 ; Mn # [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Mn # TAKRI SIGN NUKTA +1171D ; Mn # AHOM CONSONANT SIGN MEDIAL LA +1171F ; Mn # AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Mn # [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Mn # [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Mn # [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Mn # [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; Mn # [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; Mn # DIVES AKURU VIRAMA +11943 ; Mn # DIVES AKURU SIGN NUKTA +119D4..119D7 ; Mn # [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Mn # [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Mn # NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Mn # [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Mn # [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Mn # [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Mn # ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Mn # [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Mn # [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Mn # [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Mn # [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Mn # [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Mn # [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Mn # BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Mn # [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Mn # [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Mn # [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Mn # [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Mn # [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Mn # MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Mn # [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Mn # [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Mn # MASARAM GONDI RA-KARA +11D90..11D91 ; Mn # [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Mn # GUNJALA GONDI SIGN ANUSVARA +11D97 ; Mn # GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Mn # [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; Mn # [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Mn # [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Mn # KAWI VOWEL SIGN EU +11F42 ; Mn # KAWI CONJOINER +11F5A ; Mn # KAWI SIGN NUKTA +13440 ; Mn # EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Mn # [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Mn # [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Mn # [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; Mn # [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; Mn # KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; Mn # [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Mn # [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Mn # [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; Mn # [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; Mn # [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Mn # [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Mn # [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Mn # [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Mn # [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Mn # [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Mn # SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Mn # SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Mn # [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Mn # [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Mn # [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Mn # [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Mn # [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Mn # [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Mn # [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Mn # COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; Mn # [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Mn # TOTO SIGN RISING TONE +1E2EC..1E2EF ; Mn # [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Mn # [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Mn # [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; Mn # [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2020 + +# ================================================ + +# General_Category=Enclosing_Mark + +0488..0489 ; Me # [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +1ABE ; Me # COMBINING PARENTHESES OVERLAY +20DD..20E0 ; Me # [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E2..20E4 ; Me # [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +A670..A672 ; Me # [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN + +# Total code points: 13 + +# ================================================ + +# General_Category=Spacing_Mark + +0903 ; Mc # DEVANAGARI SIGN VISARGA +093B ; Mc # DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Mc # [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; Mc # [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Mc # [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0982..0983 ; Mc # [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Mc # [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C7..09C8 ; Mc # [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Mc # [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Mc # BENGALI AU LENGTH MARK +0A03 ; Mc # GURMUKHI SIGN VISARGA +0A3E..0A40 ; Mc # [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A83 ; Mc # GUJARATI SIGN VISARGA +0ABE..0AC0 ; Mc # [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; Mc # GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Mc # [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0B02..0B03 ; Mc # [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Mc # ORIYA VOWEL SIGN AA +0B40 ; Mc # ORIYA VOWEL SIGN II +0B47..0B48 ; Mc # [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Mc # [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B57 ; Mc # ORIYA AU LENGTH MARK +0BBE..0BBF ; Mc # [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC1..0BC2 ; Mc # [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Mc # [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Mc # [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Mc # TAMIL AU LENGTH MARK +0C01..0C03 ; Mc # [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C41..0C44 ; Mc # [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C82..0C83 ; Mc # [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Mc # KANNADA VOWEL SIGN AA +0CC0..0CC4 ; Mc # [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC7..0CC8 ; Mc # [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Mc # [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CD5..0CD6 ; Mc # [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CF3 ; Mc # KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D02..0D03 ; Mc # [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Mc # [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D46..0D48 ; Mc # [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Mc # [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Mc # MALAYALAM AU LENGTH MARK +0D82..0D83 ; Mc # [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Mc # [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDF ; Mc # [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Mc # [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0F3E..0F3F ; Mc # [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F7F ; Mc # TIBETAN SIGN RNAM BCAD +102B..102C ; Mc # [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +1031 ; Mc # MYANMAR VOWEL SIGN E +1038 ; Mc # MYANMAR SIGN VISARGA +103B..103C ; Mc # [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +1056..1057 ; Mc # [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1062..1064 ; Mc # [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1067..106D ; Mc # [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1083..1084 ; Mc # [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1087..108C ; Mc # [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108F ; Mc # MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Mc # [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +1715 ; Mc # TAGALOG SIGN PAMUDPOD +1734 ; Mc # HANUNOO SIGN PAMUDPOD +17B6 ; Mc # KHMER VOWEL SIGN AA +17BE..17C5 ; Mc # [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; Mc # [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1923..1926 ; Mc # [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; Mc # [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Mc # [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; Mc # [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A19..1A1A ; Mc # [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A55 ; Mc # TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; Mc # TAI THAM CONSONANT SIGN LA TANG LAI +1A61 ; Mc # TAI THAM VOWEL SIGN A +1A63..1A64 ; Mc # [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A6D..1A72 ; Mc # [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1B04 ; Mc # BALINESE SIGN BISAH +1B35 ; Mc # BALINESE VOWEL SIGN TEDUNG +1B3B ; Mc # BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; Mc # [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; Mc # [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B82 ; Mc # SUNDANESE SIGN PANGWISAD +1BA1 ; Mc # SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; Mc # [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; Mc # SUNDANESE SIGN PAMAAEH +1BE7 ; Mc # BATAK VOWEL SIGN E +1BEA..1BEC ; Mc # [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; Mc # BATAK VOWEL SIGN U +1BF2..1BF3 ; Mc # [2] BATAK PANGOLAT..BATAK PANONGONAN +1C24..1C2B ; Mc # [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; Mc # [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CE1 ; Mc # VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CF7 ; Mc # VEDIC SIGN ATIKRAMA +302E..302F ; Mc # [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +A823..A824 ; Mc # [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; Mc # SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Mc # [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Mc # [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A952..A953 ; Mc # [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A983 ; Mc # JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Mc # [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; Mc # [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9C0 ; Mc # [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +AA2F..AA30 ; Mc # [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; Mc # [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA4D ; Mc # CHAM CONSONANT SIGN FINAL H +AA7B ; Mc # MYANMAR SIGN PAO KAREN TONE +AA7D ; Mc # MYANMAR SIGN TAI LAING TONE-5 +AAEB ; Mc # MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Mc # [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Mc # MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Mc # [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; Mc # [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; Mc # [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; Mc # MEETEI MAYEK LUM IYEK +11000 ; Mc # BRAHMI SIGN CANDRABINDU +11002 ; Mc # BRAHMI SIGN VISARGA +11082 ; Mc # KAITHI SIGN VISARGA +110B0..110B2 ; Mc # [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; Mc # [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; Mc # CHAKMA VOWEL SIGN E +11145..11146 ; Mc # [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11182 ; Mc # SHARADA SIGN VISARGA +111B3..111B5 ; Mc # [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; Mc # [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111CE ; Mc # SHARADA VOWEL SIGN PRISHTHAMATRA E +1122C..1122E ; Mc # [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; Mc # [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; Mc # KHOJKI SIGN VIRAMA +112E0..112E2 ; Mc # [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +11302..11303 ; Mc # [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Mc # [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11341..11344 ; Mc # [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Mc # [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; Mc # [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11357 ; Mc # GRANTHA AU LENGTH MARK +11362..11363 ; Mc # [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Mc # [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113C2 ; Mc # TULU-TIGALARI VOWEL SIGN EE +113C5 ; Mc # TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Mc # [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Mc # [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +113CF ; Mc # TULU-TIGALARI SIGN LOOPED VIRAMA +11435..11437 ; Mc # [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; Mc # [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; Mc # NEWA SIGN VISARGA +114B0..114B2 ; Mc # [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B9 ; Mc # TIRHUTA VOWEL SIGN E +114BB..114BE ; Mc # [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114C1 ; Mc # TIRHUTA SIGN VISARGA +115AF..115B1 ; Mc # [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B8..115BB ; Mc # [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; Mc # SIDDHAM SIGN VISARGA +11630..11632 ; Mc # [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; Mc # [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; Mc # MODI SIGN VISARGA +116AC ; Mc # TAKRI SIGN VISARGA +116AE..116AF ; Mc # [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; Mc # TAKRI SIGN VIRAMA +1171E ; Mc # AHOM CONSONANT SIGN MEDIAL RA +11720..11721 ; Mc # [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11726 ; Mc # AHOM VOWEL SIGN E +1182C..1182E ; Mc # [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; Mc # DOGRA SIGN VISARGA +11930..11935 ; Mc # [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Mc # [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193D ; Mc # DIVES AKURU SIGN HALANTA +11940 ; Mc # DIVES AKURU MEDIAL YA +11942 ; Mc # DIVES AKURU MEDIAL RA +119D1..119D3 ; Mc # [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; Mc # [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E4 ; Mc # NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A39 ; Mc # ZANABAZAR SQUARE SIGN VISARGA +11A57..11A58 ; Mc # [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A97 ; Mc # SOYOMBO SIGN VISARGA +11C2F ; Mc # BHAIKSUKI VOWEL SIGN AA +11C3E ; Mc # BHAIKSUKI SIGN VISARGA +11CA9 ; Mc # MARCHEN SUBJOINED LETTER YA +11CB1 ; Mc # MARCHEN VOWEL SIGN I +11CB4 ; Mc # MARCHEN VOWEL SIGN O +11D8A..11D8E ; Mc # [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; Mc # [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; Mc # GUNJALA GONDI SIGN VISARGA +11EF5..11EF6 ; Mc # [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F03 ; Mc # KAWI SIGN VISARGA +11F34..11F35 ; Mc # [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; Mc # [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F41 ; Mc # KAWI SIGN KILLER +1612A..1612C ; Mc # [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16F51..16F87 ; Mc # [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 + +# Total code points: 468 + +# ================================================ + +# General_Category=Decimal_Number + +0030..0039 ; Nd # [10] DIGIT ZERO..DIGIT NINE +0660..0669 ; Nd # [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +06F0..06F9 ; Nd # [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +07C0..07C9 ; Nd # [10] NKO DIGIT ZERO..NKO DIGIT NINE +0966..096F ; Nd # [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +09E6..09EF ; Nd # [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +0A66..0A6F ; Nd # [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0AE6..0AEF ; Nd # [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0B66..0B6F ; Nd # [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0BE6..0BEF ; Nd # [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C66..0C6F ; Nd # [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0CE6..0CEF ; Nd # [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0D66..0D6F ; Nd # [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0DE6..0DEF ; Nd # [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0E50..0E59 ; Nd # [10] THAI DIGIT ZERO..THAI DIGIT NINE +0ED0..0ED9 ; Nd # [10] LAO DIGIT ZERO..LAO DIGIT NINE +0F20..0F29 ; Nd # [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +1040..1049 ; Nd # [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +1090..1099 ; Nd # [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +17E0..17E9 ; Nd # [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +1810..1819 ; Nd # [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1946..194F ; Nd # [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +19D0..19D9 ; Nd # [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +1A80..1A89 ; Nd # [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Nd # [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1B50..1B59 ; Nd # [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1BB0..1BB9 ; Nd # [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1C40..1C49 ; Nd # [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C50..1C59 ; Nd # [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +A620..A629 ; Nd # [10] VAI DIGIT ZERO..VAI DIGIT NINE +A8D0..A8D9 ; Nd # [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A900..A909 ; Nd # [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A9D0..A9D9 ; Nd # [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9F0..A9F9 ; Nd # [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +AA50..AA59 ; Nd # [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +ABF0..ABF9 ; Nd # [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +104A0..104A9 ; Nd # [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +10D30..10D39 ; Nd # [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Nd # [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +11066..1106F ; Nd # [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Nd # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Nd # [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Nd # [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +112F0..112F9 ; Nd # [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11450..11459 ; Nd # [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +114D0..114D9 ; Nd # [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11650..11659 ; Nd # [10] MODI DIGIT ZERO..MODI DIGIT NINE +116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Nd # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11730..11739 ; Nd # [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +118E0..118E9 ; Nd # [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11950..11959 ; Nd # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Nd # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11F50..11F59 ; Nd # [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +16130..16139 ; Nd # [10] GURUNG KHEMA DIGIT ZERO..GURUNG KHEMA DIGIT NINE +16A60..16A69 ; Nd # [10] MRO DIGIT ZERO..MRO DIGIT NINE +16AC0..16AC9 ; Nd # [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16B50..16B59 ; Nd # [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16D70..16D79 ; Nd # [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE +1CCF0..1CCF9 ; Nd # [10] OUTLINED DIGIT ZERO..OUTLINED DIGIT NINE +1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E2F0..1E2F9 ; Nd # [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E4F0..1E4F9 ; Nd # [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E5F1..1E5FA ; Nd # [10] OL ONAL DIGIT ZERO..OL ONAL DIGIT NINE +1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE + +# Total code points: 760 + +# ================================================ + +# General_Category=Letter_Number + +16EE..16F0 ; Nl # [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +2160..2182 ; Nl # [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2185..2188 ; Nl # [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +3007 ; Nl # IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Nl # [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Nl # [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +A6E6..A6EF ; Nl # [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +10140..10174 ; Nl # [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10341 ; Nl # GOTHIC LETTER NINETY +1034A ; Nl # GOTHIC LETTER NINE HUNDRED +103D1..103D5 ; Nl # [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +12400..1246E ; Nl # [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM + +# Total code points: 236 + +# ================================================ + +# General_Category=Other_Number + +00B2..00B3 ; No # [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B9 ; No # SUPERSCRIPT ONE +00BC..00BE ; No # [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +09F4..09F9 ; No # [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +0B72..0B77 ; No # [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0BF0..0BF2 ; No # [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0C78..0C7E ; No # [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0D58..0D5E ; No # [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D70..0D78 ; No # [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0F2A..0F33 ; No # [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +1369..137C ; No # [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +17F0..17F9 ; No # [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +19DA ; No # NEW TAI LUE THAM DIGIT ONE +2070 ; No # SUPERSCRIPT ZERO +2074..2079 ; No # [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +2080..2089 ; No # [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +2150..215F ; No # [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; No # VULGAR FRACTION ZERO THIRDS +2460..249B ; No # [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +24EA..24FF ; No # [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2776..2793 ; No # [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2CFD ; No # COPTIC FRACTION ONE HALF +3192..3195 ; No # [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3220..3229 ; No # [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +3248..324F ; No # [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3251..325F ; No # [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +3280..3289 ; No # [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +32B1..32BF ; No # [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +10107..10133 ; No # [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10175..10178 ; No # [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +1018A..1018B ; No # [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +102E1..102FB ; No # [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +10320..10323 ; No # [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +10858..1085F ; No # [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10879..1087F ; No # [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +108A7..108AF ; No # [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108FB..108FF ; No # [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10916..1091B ; No # [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +109BC..109BD ; No # [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109C0..109CF ; No # [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; No # [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A40..10A48 ; No # [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A7D..10A7E ; No # [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A9D..10A9F ; No # [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AEB..10AEF ; No # [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10B58..10B5F ; No # [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B78..10B7F ; No # [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10BA9..10BAF ; No # [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10CFA..10CFF ; No # [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10E60..10E7E ; No # [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10F1D..10F26 ; No # [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F51..10F54 ; No # [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10FC5..10FCB ; No # [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +11052..11065 ; No # [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +111E1..111F4 ; No # [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +1173A..1173B ; No # [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +118EA..118F2 ; No # [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +11C5A..11C6C ; No # [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11FC0..11FD4 ; No # [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +16B5B..16B61 ; No # [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16E80..16E96 ; No # [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +1D2C0..1D2D3 ; No # [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN +1D2E0..1D2F3 ; No # [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D360..1D378 ; No # [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1E8C7..1E8CF ; No # [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1EC71..1ECAB ; No # [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAD..1ECAF ; No # [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB1..1ECB4 ; No # [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; No # [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2F..1ED3D ; No # [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1F100..1F10C ; No # [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO + +# Total code points: 915 + +# ================================================ + +# General_Category=Space_Separator + +0020 ; Zs # SPACE +00A0 ; Zs # NO-BREAK SPACE +1680 ; Zs # OGHAM SPACE MARK +2000..200A ; Zs # [11] EN QUAD..HAIR SPACE +202F ; Zs # NARROW NO-BREAK SPACE +205F ; Zs # MEDIUM MATHEMATICAL SPACE +3000 ; Zs # IDEOGRAPHIC SPACE + +# Total code points: 17 + +# ================================================ + +# General_Category=Line_Separator + +2028 ; Zl # LINE SEPARATOR + +# Total code points: 1 + +# ================================================ + +# General_Category=Paragraph_Separator + +2029 ; Zp # PARAGRAPH SEPARATOR + +# Total code points: 1 + +# ================================================ + +# General_Category=Control + +0000..001F ; Cc # [32] .. +007F..009F ; Cc # [33] .. + +# Total code points: 65 + +# ================================================ + +# General_Category=Format + +00AD ; Cf # SOFT HYPHEN +0600..0605 ; Cf # [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +061C ; Cf # ARABIC LETTER MARK +06DD ; Cf # ARABIC END OF AYAH +070F ; Cf # SYRIAC ABBREVIATION MARK +0890..0891 ; Cf # [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Cf # ARABIC DISPUTED END OF AYAH +180E ; Cf # MONGOLIAN VOWEL SEPARATOR +200B..200F ; Cf # [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +202A..202E ; Cf # [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Cf # [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; Cf # [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +FEFF ; Cf # ZERO WIDTH NO-BREAK SPACE +FFF9..FFFB ; Cf # [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +110BD ; Cf # KAITHI NUMBER SIGN +110CD ; Cf # KAITHI NUMBER SIGN ABOVE +13430..1343F ; Cf # [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +1BCA0..1BCA3 ; Cf # [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; Cf # [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0001 ; Cf # LANGUAGE TAG +E0020..E007F ; Cf # [96] TAG SPACE..CANCEL TAG + +# Total code points: 170 + +# ================================================ + +# General_Category=Private_Use + +E000..F8FF ; Co # [6400] .. +F0000..FFFFD ; Co # [65534] .. +100000..10FFFD; Co # [65534] .. + +# Total code points: 137468 + +# ================================================ + +# General_Category=Surrogate + +D800..DFFF ; Cs # [2048] .. + +# Total code points: 2048 + +# ================================================ + +# General_Category=Dash_Punctuation + +002D ; Pd # HYPHEN-MINUS +058A ; Pd # ARMENIAN HYPHEN +05BE ; Pd # HEBREW PUNCTUATION MAQAF +1400 ; Pd # CANADIAN SYLLABICS HYPHEN +1806 ; Pd # MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Pd # [6] HYPHEN..HORIZONTAL BAR +2E17 ; Pd # DOUBLE OBLIQUE HYPHEN +2E1A ; Pd # HYPHEN WITH DIAERESIS +2E3A..2E3B ; Pd # [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Pd # DOUBLE HYPHEN +2E5D ; Pd # OBLIQUE HYPHEN +301C ; Pd # WAVE DASH +3030 ; Pd # WAVY DASH +30A0 ; Pd # KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Pd # [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Pd # SMALL EM DASH +FE63 ; Pd # SMALL HYPHEN-MINUS +FF0D ; Pd # FULLWIDTH HYPHEN-MINUS +10D6E ; Pd # GARAY HYPHEN +10EAD ; Pd # YEZIDI HYPHENATION MARK + +# Total code points: 27 + +# ================================================ + +# General_Category=Open_Punctuation + +0028 ; Ps # LEFT PARENTHESIS +005B ; Ps # LEFT SQUARE BRACKET +007B ; Ps # LEFT CURLY BRACKET +0F3A ; Ps # TIBETAN MARK GUG RTAGS GYON +0F3C ; Ps # TIBETAN MARK ANG KHANG GYON +169B ; Ps # OGHAM FEATHER MARK +201A ; Ps # SINGLE LOW-9 QUOTATION MARK +201E ; Ps # DOUBLE LOW-9 QUOTATION MARK +2045 ; Ps # LEFT SQUARE BRACKET WITH QUILL +207D ; Ps # SUPERSCRIPT LEFT PARENTHESIS +208D ; Ps # SUBSCRIPT LEFT PARENTHESIS +2308 ; Ps # LEFT CEILING +230A ; Ps # LEFT FLOOR +2329 ; Ps # LEFT-POINTING ANGLE BRACKET +2768 ; Ps # MEDIUM LEFT PARENTHESIS ORNAMENT +276A ; Ps # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276C ; Ps # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276E ; Ps # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Ps # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Ps # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Ps # MEDIUM LEFT CURLY BRACKET ORNAMENT +27C5 ; Ps # LEFT S-SHAPED BAG DELIMITER +27E6 ; Ps # MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E8 ; Ps # MATHEMATICAL LEFT ANGLE BRACKET +27EA ; Ps # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EC ; Ps # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27EE ; Ps # MATHEMATICAL LEFT FLATTENED PARENTHESIS +2983 ; Ps # LEFT WHITE CURLY BRACKET +2985 ; Ps # LEFT WHITE PARENTHESIS +2987 ; Ps # Z NOTATION LEFT IMAGE BRACKET +2989 ; Ps # Z NOTATION LEFT BINDING BRACKET +298B ; Ps # LEFT SQUARE BRACKET WITH UNDERBAR +298D ; Ps # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298F ; Ps # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2991 ; Ps # LEFT ANGLE BRACKET WITH DOT +2993 ; Ps # LEFT ARC LESS-THAN BRACKET +2995 ; Ps # DOUBLE LEFT ARC GREATER-THAN BRACKET +2997 ; Ps # LEFT BLACK TORTOISE SHELL BRACKET +29D8 ; Ps # LEFT WIGGLY FENCE +29DA ; Ps # LEFT DOUBLE WIGGLY FENCE +29FC ; Ps # LEFT-POINTING CURVED ANGLE BRACKET +2E22 ; Ps # TOP LEFT HALF BRACKET +2E24 ; Ps # BOTTOM LEFT HALF BRACKET +2E26 ; Ps # LEFT SIDEWAYS U BRACKET +2E28 ; Ps # LEFT DOUBLE PARENTHESIS +2E42 ; Ps # DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E55 ; Ps # LEFT SQUARE BRACKET WITH STROKE +2E57 ; Ps # LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Ps # TOP HALF LEFT PARENTHESIS +2E5B ; Ps # BOTTOM HALF LEFT PARENTHESIS +3008 ; Ps # LEFT ANGLE BRACKET +300A ; Ps # LEFT DOUBLE ANGLE BRACKET +300C ; Ps # LEFT CORNER BRACKET +300E ; Ps # LEFT WHITE CORNER BRACKET +3010 ; Ps # LEFT BLACK LENTICULAR BRACKET +3014 ; Ps # LEFT TORTOISE SHELL BRACKET +3016 ; Ps # LEFT WHITE LENTICULAR BRACKET +3018 ; Ps # LEFT WHITE TORTOISE SHELL BRACKET +301A ; Ps # LEFT WHITE SQUARE BRACKET +301D ; Ps # REVERSED DOUBLE PRIME QUOTATION MARK +FD3F ; Ps # ORNATE RIGHT PARENTHESIS +FE17 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE35 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE37 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE39 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3B ; Ps # PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3D ; Ps # PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3F ; Ps # PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE41 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE43 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE47 ; Ps # PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE59 ; Ps # SMALL LEFT PARENTHESIS +FE5B ; Ps # SMALL LEFT CURLY BRACKET +FE5D ; Ps # SMALL LEFT TORTOISE SHELL BRACKET +FF08 ; Ps # FULLWIDTH LEFT PARENTHESIS +FF3B ; Ps # FULLWIDTH LEFT SQUARE BRACKET +FF5B ; Ps # FULLWIDTH LEFT CURLY BRACKET +FF5F ; Ps # FULLWIDTH LEFT WHITE PARENTHESIS +FF62 ; Ps # HALFWIDTH LEFT CORNER BRACKET + +# Total code points: 79 + +# ================================================ + +# General_Category=Close_Punctuation + +0029 ; Pe # RIGHT PARENTHESIS +005D ; Pe # RIGHT SQUARE BRACKET +007D ; Pe # RIGHT CURLY BRACKET +0F3B ; Pe # TIBETAN MARK GUG RTAGS GYAS +0F3D ; Pe # TIBETAN MARK ANG KHANG GYAS +169C ; Pe # OGHAM REVERSED FEATHER MARK +2046 ; Pe # RIGHT SQUARE BRACKET WITH QUILL +207E ; Pe # SUPERSCRIPT RIGHT PARENTHESIS +208E ; Pe # SUBSCRIPT RIGHT PARENTHESIS +2309 ; Pe # RIGHT CEILING +230B ; Pe # RIGHT FLOOR +232A ; Pe # RIGHT-POINTING ANGLE BRACKET +2769 ; Pe # MEDIUM RIGHT PARENTHESIS ORNAMENT +276B ; Pe # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276D ; Pe # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276F ; Pe # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2771 ; Pe # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2773 ; Pe # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2775 ; Pe # MEDIUM RIGHT CURLY BRACKET ORNAMENT +27C6 ; Pe # RIGHT S-SHAPED BAG DELIMITER +27E7 ; Pe # MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E9 ; Pe # MATHEMATICAL RIGHT ANGLE BRACKET +27EB ; Pe # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27ED ; Pe # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EF ; Pe # MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2984 ; Pe # RIGHT WHITE CURLY BRACKET +2986 ; Pe # RIGHT WHITE PARENTHESIS +2988 ; Pe # Z NOTATION RIGHT IMAGE BRACKET +298A ; Pe # Z NOTATION RIGHT BINDING BRACKET +298C ; Pe # RIGHT SQUARE BRACKET WITH UNDERBAR +298E ; Pe # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pe # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2992 ; Pe # RIGHT ANGLE BRACKET WITH DOT +2994 ; Pe # RIGHT ARC GREATER-THAN BRACKET +2996 ; Pe # DOUBLE RIGHT ARC LESS-THAN BRACKET +2998 ; Pe # RIGHT BLACK TORTOISE SHELL BRACKET +29D9 ; Pe # RIGHT WIGGLY FENCE +29DB ; Pe # RIGHT DOUBLE WIGGLY FENCE +29FD ; Pe # RIGHT-POINTING CURVED ANGLE BRACKET +2E23 ; Pe # TOP RIGHT HALF BRACKET +2E25 ; Pe # BOTTOM RIGHT HALF BRACKET +2E27 ; Pe # RIGHT SIDEWAYS U BRACKET +2E29 ; Pe # RIGHT DOUBLE PARENTHESIS +2E56 ; Pe # RIGHT SQUARE BRACKET WITH STROKE +2E58 ; Pe # RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E5A ; Pe # TOP HALF RIGHT PARENTHESIS +2E5C ; Pe # BOTTOM HALF RIGHT PARENTHESIS +3009 ; Pe # RIGHT ANGLE BRACKET +300B ; Pe # RIGHT DOUBLE ANGLE BRACKET +300D ; Pe # RIGHT CORNER BRACKET +300F ; Pe # RIGHT WHITE CORNER BRACKET +3011 ; Pe # RIGHT BLACK LENTICULAR BRACKET +3015 ; Pe # RIGHT TORTOISE SHELL BRACKET +3017 ; Pe # RIGHT WHITE LENTICULAR BRACKET +3019 ; Pe # RIGHT WHITE TORTOISE SHELL BRACKET +301B ; Pe # RIGHT WHITE SQUARE BRACKET +301E..301F ; Pe # [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FD3E ; Pe # ORNATE LEFT PARENTHESIS +FE18 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE36 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE38 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE3A ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3C ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3E ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE40 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE42 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE44 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE48 ; Pe # PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE5A ; Pe # SMALL RIGHT PARENTHESIS +FE5C ; Pe # SMALL RIGHT CURLY BRACKET +FE5E ; Pe # SMALL RIGHT TORTOISE SHELL BRACKET +FF09 ; Pe # FULLWIDTH RIGHT PARENTHESIS +FF3D ; Pe # FULLWIDTH RIGHT SQUARE BRACKET +FF5D ; Pe # FULLWIDTH RIGHT CURLY BRACKET +FF60 ; Pe # FULLWIDTH RIGHT WHITE PARENTHESIS +FF63 ; Pe # HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 77 + +# ================================================ + +# General_Category=Connector_Punctuation + +005F ; Pc # LOW LINE +203F..2040 ; Pc # [2] UNDERTIE..CHARACTER TIE +2054 ; Pc # INVERTED UNDERTIE +FE33..FE34 ; Pc # [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; Pc # [3] DASHED LOW LINE..WAVY LOW LINE +FF3F ; Pc # FULLWIDTH LOW LINE + +# Total code points: 10 + +# ================================================ + +# General_Category=Other_Punctuation + +0021..0023 ; Po # [3] EXCLAMATION MARK..NUMBER SIGN +0025..0027 ; Po # [3] PERCENT SIGN..APOSTROPHE +002A ; Po # ASTERISK +002C ; Po # COMMA +002E..002F ; Po # [2] FULL STOP..SOLIDUS +003A..003B ; Po # [2] COLON..SEMICOLON +003F..0040 ; Po # [2] QUESTION MARK..COMMERCIAL AT +005C ; Po # REVERSE SOLIDUS +00A1 ; Po # INVERTED EXCLAMATION MARK +00A7 ; Po # SECTION SIGN +00B6..00B7 ; Po # [2] PILCROW SIGN..MIDDLE DOT +00BF ; Po # INVERTED QUESTION MARK +037E ; Po # GREEK QUESTION MARK +0387 ; Po # GREEK ANO TELEIA +055A..055F ; Po # [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0589 ; Po # ARMENIAN FULL STOP +05C0 ; Po # HEBREW PUNCTUATION PASEQ +05C3 ; Po # HEBREW PUNCTUATION SOF PASUQ +05C6 ; Po # HEBREW PUNCTUATION NUN HAFUKHA +05F3..05F4 ; Po # [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +0609..060A ; Po # [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060C..060D ; Po # [2] ARABIC COMMA..ARABIC DATE SEPARATOR +061B ; Po # ARABIC SEMICOLON +061D..061F ; Po # [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +066A..066D ; Po # [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +06D4 ; Po # ARABIC FULL STOP +0700..070D ; Po # [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +07F7..07F9 ; Po # [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +0830..083E ; Po # [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +085E ; Po # MANDAIC PUNCTUATION +0964..0965 ; Po # [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0970 ; Po # DEVANAGARI ABBREVIATION SIGN +09FD ; Po # BENGALI ABBREVIATION SIGN +0A76 ; Po # GURMUKHI ABBREVIATION SIGN +0AF0 ; Po # GUJARATI ABBREVIATION SIGN +0C77 ; Po # TELUGU SIGN SIDDHAM +0C84 ; Po # KANNADA SIGN SIDDHAM +0DF4 ; Po # SINHALA PUNCTUATION KUNDDALIYA +0E4F ; Po # THAI CHARACTER FONGMAN +0E5A..0E5B ; Po # [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F04..0F12 ; Po # [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F14 ; Po # TIBETAN MARK GTER TSHEG +0F85 ; Po # TIBETAN MARK PALUTA +0FD0..0FD4 ; Po # [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD9..0FDA ; Po # [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +104A..104F ; Po # [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +10FB ; Po # GEORGIAN PARAGRAPH SEPARATOR +1360..1368 ; Po # [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Po # CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Po # [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Po # [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Po # [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D8..17DA ; Po # [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +1800..1805 ; Po # [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1807..180A ; Po # [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +1944..1945 ; Po # [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1A1E..1A1F ; Po # [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1AA0..1AA6 ; Po # [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA8..1AAD ; Po # [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B4E..1B4F ; Po # [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN +1B5A..1B60 ; Po # [7] BALINESE PANTI..BALINESE PAMENENG +1B7D..1B7F ; Po # [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK +1BFC..1BFF ; Po # [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C3B..1C3F ; Po # [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Po # [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; Po # [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD3 ; Po # VEDIC SIGN NIHSHVASA +2016..2017 ; Po # [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2020..2027 ; Po # [8] DAGGER..HYPHENATION POINT +2030..2038 ; Po # [9] PER MILLE SIGN..CARET +203B..203E ; Po # [4] REFERENCE MARK..OVERLINE +2041..2043 ; Po # [3] CARET INSERTION POINT..HYPHEN BULLET +2047..2051 ; Po # [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2053 ; Po # SWUNG DASH +2055..205E ; Po # [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2CF9..2CFC ; Po # [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFE..2CFF ; Po # [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2D70 ; Po # TIFINAGH SEPARATOR MARK +2E00..2E01 ; Po # [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E06..2E08 ; Po # [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E0B ; Po # RAISED SQUARE +2E0E..2E16 ; Po # [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E18..2E19 ; Po # [2] INVERTED INTERROBANG..PALM BRANCH +2E1B ; Po # TILDE WITH RING ABOVE +2E1E..2E1F ; Po # [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E2A..2E2E ; Po # [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E30..2E39 ; Po # [10] RING POINT..TOP HALF SECTION SIGN +2E3C..2E3F ; Po # [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E41 ; Po # REVERSED COMMA +2E43..2E4F ; Po # [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E52..2E54 ; Po # [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +3001..3003 ; Po # [3] IDEOGRAPHIC COMMA..DITTO MARK +303D ; Po # PART ALTERNATION MARK +30FB ; Po # KATAKANA MIDDLE DOT +A4FE..A4FF ; Po # [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Po # [3] VAI COMMA..VAI QUESTION MARK +A673 ; Po # SLAVONIC ASTERISK +A67E ; Po # CYRILLIC KAVYKA +A6F2..A6F7 ; Po # [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A874..A877 ; Po # [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Po # [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8F8..A8FA ; Po # [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FC ; Po # DEVANAGARI SIGN SIDDHAM +A92E..A92F ; Po # [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A95F ; Po # REJANG SECTION MARK +A9C1..A9CD ; Po # [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9DE..A9DF ; Po # [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +AA5C..AA5F ; Po # [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AADE..AADF ; Po # [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Po # [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Po # MEETEI MAYEK CHEIKHEI +FE10..FE16 ; Po # [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE19 ; Po # PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; Po # PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE45..FE46 ; Po # [2] SESAME DOT..WHITE SESAME DOT +FE49..FE4C ; Po # [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE50..FE52 ; Po # [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Po # [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE5F..FE61 ; Po # [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE68 ; Po # SMALL REVERSE SOLIDUS +FE6A..FE6B ; Po # [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FF01..FF03 ; Po # [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF05..FF07 ; Po # [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF0A ; Po # FULLWIDTH ASTERISK +FF0C ; Po # FULLWIDTH COMMA +FF0E..FF0F ; Po # [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF1A..FF1B ; Po # [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F..FF20 ; Po # [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3C ; Po # FULLWIDTH REVERSE SOLIDUS +FF61 ; Po # HALFWIDTH IDEOGRAPHIC FULL STOP +FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +10100..10102 ; Po # [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +1039F ; Po # UGARITIC WORD DIVIDER +103D0 ; Po # OLD PERSIAN WORD DIVIDER +1056F ; Po # CAUCASIAN ALBANIAN CITATION MARK +10857 ; Po # IMPERIAL ARAMAIC SECTION SIGN +1091F ; Po # PHOENICIAN WORD SEPARATOR +1093F ; Po # LYDIAN TRIANGULAR MARK +10A50..10A58 ; Po # [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A7F ; Po # OLD SOUTH ARABIAN NUMERIC INDICATOR +10AF0..10AF6 ; Po # [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B39..10B3F ; Po # [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Po # [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10F55..10F59 ; Po # [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Po # [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +11047..1104D ; Po # [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BB..110BC ; Po # [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BE..110C1 ; Po # [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11140..11143 ; Po # [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11174..11175 ; Po # [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +111C5..111C8 ; Po # [4] SHARADA DANDA..SHARADA SEPARATOR +111CD ; Po # SHARADA SUTRA MARK +111DB ; Po # SHARADA SIGN SIDDHAM +111DD..111DF ; Po # [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +11238..1123D ; Po # [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +112A9 ; Po # MULTANI SECTION MARK +113D4..113D5 ; Po # [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Po # [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA +1144B..1144F ; Po # [5] NEWA DANDA..NEWA ABBREVIATION SIGN +1145A..1145B ; Po # [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; Po # NEWA INSERTION SIGN +114C6 ; Po # TIRHUTA ABBREVIATION SIGN +115C1..115D7 ; Po # [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11643 ; Po # [3] MODI DANDA..MODI ABBREVIATION SIGN +11660..1166C ; Po # [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +116B9 ; Po # TAKRI ABBREVIATION SIGN +1173C..1173E ; Po # [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1183B ; Po # DOGRA ABBREVIATION SIGN +11944..11946 ; Po # [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +119E2 ; Po # NANDINAGARI SIGN SIDDHAM +11A3F..11A46 ; Po # [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9E..11AA2 ; Po # [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11B00..11B09 ; Po # [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BE1 ; Po # SUNUWAR SIGN PVO +11C41..11C45 ; Po # [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C70..11C71 ; Po # [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11EF7..11EF8 ; Po # [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F4F ; Po # [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11FFF ; Po # TAMIL PUNCTUATION END OF TEXT +12470..12474 ; Po # [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12FF1..12FF2 ; Po # [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +16A6E..16A6F ; Po # [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Po # BASSA VAH FULL STOP +16B37..16B3B ; Po # [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B44 ; Po # PAHAWH HMONG SIGN XAUS +16D6D..16D6F ; Po # [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA +16E97..16E9A ; Po # [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16FE2 ; Po # OLD CHINESE HOOK MARK +1BC9F ; Po # DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1E5FF ; Po # OL ONAL ABBREVIATION SIGN +1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK + +# Total code points: 640 + +# ================================================ + +# General_Category=Math_Symbol + +002B ; Sm # PLUS SIGN +003C..003E ; Sm # [3] LESS-THAN SIGN..GREATER-THAN SIGN +007C ; Sm # VERTICAL LINE +007E ; Sm # TILDE +00AC ; Sm # NOT SIGN +00B1 ; Sm # PLUS-MINUS SIGN +00D7 ; Sm # MULTIPLICATION SIGN +00F7 ; Sm # DIVISION SIGN +03F6 ; Sm # GREEK REVERSED LUNATE EPSILON SYMBOL +0606..0608 ; Sm # [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +2044 ; Sm # FRACTION SLASH +2052 ; Sm # COMMERCIAL MINUS SIGN +207A..207C ; Sm # [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +208A..208C ; Sm # [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +2118 ; Sm # SCRIPT CAPITAL P +2140..2144 ; Sm # [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +214B ; Sm # TURNED AMPERSAND +2190..2194 ; Sm # [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +219A..219B ; Sm # [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +21A0 ; Sm # RIGHTWARDS TWO HEADED ARROW +21A3 ; Sm # RIGHTWARDS ARROW WITH TAIL +21A6 ; Sm # RIGHTWARDS ARROW FROM BAR +21AE ; Sm # LEFT RIGHT ARROW WITH STROKE +21CE..21CF ; Sm # [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D2 ; Sm # RIGHTWARDS DOUBLE ARROW +21D4 ; Sm # LEFT RIGHT DOUBLE ARROW +21F4..22FF ; Sm # [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2320..2321 ; Sm # [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +237C ; Sm # RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +239B..23B3 ; Sm # [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23DC..23E1 ; Sm # [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +25B7 ; Sm # WHITE RIGHT-POINTING TRIANGLE +25C1 ; Sm # WHITE LEFT-POINTING TRIANGLE +25F8..25FF ; Sm # [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +266F ; Sm # MUSIC SHARP SIGN +27C0..27C4 ; Sm # [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C7..27E5 ; Sm # [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27F0..27FF ; Sm # [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; Sm # [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2999..29D7 ; Sm # [63] DOTTED FENCE..BLACK HOURGLASS +29DC..29FB ; Sm # [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FE..2AFF ; Sm # [258] TINY..N-ARY WHITE VERTICAL BAR +2B30..2B44 ; Sm # [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B47..2B4C ; Sm # [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +FB29 ; Sm # HEBREW LETTER ALTERNATIVE PLUS SIGN +FE62 ; Sm # SMALL PLUS SIGN +FE64..FE66 ; Sm # [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FF0B ; Sm # FULLWIDTH PLUS SIGN +FF1C..FF1E ; Sm # [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF5C ; Sm # FULLWIDTH VERTICAL LINE +FF5E ; Sm # FULLWIDTH TILDE +FFE2 ; Sm # FULLWIDTH NOT SIGN +FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +10D8E..10D8F ; Sm # [2] GARAY PLUS SIGN..GARAY MINUS SIGN +1D6C1 ; Sm # MATHEMATICAL BOLD NABLA +1D6DB ; Sm # MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; Sm # MATHEMATICAL ITALIC NABLA +1D715 ; Sm # MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; Sm # MATHEMATICAL BOLD ITALIC NABLA +1D74F ; Sm # MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; Sm # MATHEMATICAL SANS-SERIF BOLD NABLA +1D789 ; Sm # MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7C3 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; Sm # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + +# Total code points: 950 + +# ================================================ + +# General_Category=Currency_Symbol + +0024 ; Sc # DOLLAR SIGN +00A2..00A5 ; Sc # [4] CENT SIGN..YEN SIGN +058F ; Sc # ARMENIAN DRAM SIGN +060B ; Sc # AFGHANI SIGN +07FE..07FF ; Sc # [2] NKO DOROME SIGN..NKO TAMAN SIGN +09F2..09F3 ; Sc # [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09FB ; Sc # BENGALI GANDA MARK +0AF1 ; Sc # GUJARATI RUPEE SIGN +0BF9 ; Sc # TAMIL RUPEE SIGN +0E3F ; Sc # THAI CURRENCY SYMBOL BAHT +17DB ; Sc # KHMER CURRENCY SYMBOL RIEL +20A0..20C0 ; Sc # [33] EURO-CURRENCY SIGN..SOM SIGN +A838 ; Sc # NORTH INDIC RUPEE MARK +FDFC ; Sc # RIAL SIGN +FE69 ; Sc # SMALL DOLLAR SIGN +FF04 ; Sc # FULLWIDTH DOLLAR SIGN +FFE0..FFE1 ; Sc # [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +11FDD..11FE0 ; Sc # [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +1E2FF ; Sc # WANCHO NGUN SIGN +1ECB0 ; Sc # INDIC SIYAQ RUPEE MARK + +# Total code points: 63 + +# ================================================ + +# General_Category=Modifier_Symbol + +005E ; Sk # CIRCUMFLEX ACCENT +0060 ; Sk # GRAVE ACCENT +00A8 ; Sk # DIAERESIS +00AF ; Sk # MACRON +00B4 ; Sk # ACUTE ACCENT +00B8 ; Sk # CEDILLA +02C2..02C5 ; Sk # [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02D2..02DF ; Sk # [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02EB ; Sk # [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02ED ; Sk # MODIFIER LETTER UNASPIRATED +02EF..02FF ; Sk # [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0375 ; Sk # GREEK LOWER NUMERAL SIGN +0384..0385 ; Sk # [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0888 ; Sk # ARABIC RAISED ROUND DOT +1FBD ; Sk # GREEK KORONIS +1FBF..1FC1 ; Sk # [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Sk # [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Sk # [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Sk # [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Sk # [2] GREEK OXIA..GREEK DASIA +309B..309C ; Sk # [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A700..A716 ; Sk # [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A720..A721 ; Sk # [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A789..A78A ; Sk # [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +AB5B ; Sk # MODIFIER BREVE WITH INVERTED BREVE +AB6A..AB6B ; Sk # [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FBB2..FBC2 ; Sk # [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FF3E ; Sk # FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Sk # FULLWIDTH GRAVE ACCENT +FFE3 ; Sk # FULLWIDTH MACRON +1F3FB..1F3FF ; Sk # [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 + +# Total code points: 125 + +# ================================================ + +# General_Category=Other_Symbol + +00A6 ; So # BROKEN BAR +00A9 ; So # COPYRIGHT SIGN +00AE ; So # REGISTERED SIGN +00B0 ; So # DEGREE SIGN +0482 ; So # CYRILLIC THOUSANDS SIGN +058D..058E ; So # [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +060E..060F ; So # [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +06DE ; So # ARABIC START OF RUB EL HIZB +06E9 ; So # ARABIC PLACE OF SAJDAH +06FD..06FE ; So # [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +07F6 ; So # NKO SYMBOL OO DENNEN +09FA ; So # BENGALI ISSHAR +0B70 ; So # ORIYA ISSHAR +0BF3..0BF8 ; So # [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BFA ; So # TAMIL NUMBER SIGN +0C7F ; So # TELUGU SIGN TUUMU +0D4F ; So # MALAYALAM SIGN PARA +0D79 ; So # MALAYALAM DATE MARK +0F01..0F03 ; So # [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F13 ; So # TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F15..0F17 ; So # [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F1A..0F1F ; So # [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F34 ; So # TIBETAN MARK BSDUS RTAGS +0F36 ; So # TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F38 ; So # TIBETAN MARK CHE MGO +0FBE..0FC5 ; So # [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC7..0FCC ; So # [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; So # [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD5..0FD8 ; So # [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +109E..109F ; So # [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +1390..1399 ; So # [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +166D ; So # CANADIAN SYLLABICS CHI SIGN +1940 ; So # LIMBU SIGN LOO +19DE..19FF ; So # [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC +1B61..1B6A ; So # [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B74..1B7C ; So # [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +2100..2101 ; So # [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2103..2106 ; So # [4] DEGREE CELSIUS..CADA UNA +2108..2109 ; So # [2] SCRUPLE..DEGREE FAHRENHEIT +2114 ; So # L B BAR SYMBOL +2116..2117 ; So # [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +211E..2123 ; So # [6] PRESCRIPTION TAKE..VERSICLE +2125 ; So # OUNCE SIGN +2127 ; So # INVERTED OHM SIGN +2129 ; So # TURNED GREEK SMALL LETTER IOTA +212E ; So # ESTIMATED SYMBOL +213A..213B ; So # [2] ROTATED CAPITAL Q..FACSIMILE SIGN +214A ; So # PROPERTY LINE +214C..214D ; So # [2] PER SIGN..AKTIESELSKAB +214F ; So # SYMBOL FOR SAMARITAN SOURCE +218A..218B ; So # [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2195..2199 ; So # [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; So # [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; So # [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; So # [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7..21AD ; So # [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AF..21CD ; So # [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; So # [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; So # DOWNWARDS DOUBLE ARROW +21D5..21F3 ; So # [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +2300..2307 ; So # [8] DIAMETER SIGN..WAVY LINE +230C..231F ; So # [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2322..2328 ; So # [7] FROWN..KEYBOARD +232B..237B ; So # [81] ERASE TO THE LEFT..NOT CHECK MARK +237D..239A ; So # [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +23B4..23DB ; So # [40] TOP SQUARE BRACKET..FUSE +23E2..2429 ; So # [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM +2440..244A ; So # [11] OCR HOOK..OCR DOUBLE BACKSLASH +249C..24E9 ; So # [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2500..25B6 ; So # [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B8..25C0 ; So # [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C2..25F7 ; So # [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +2600..266E ; So # [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +2670..2767 ; So # [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2794..27BF ; So # [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +2800..28FF ; So # [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2B00..2B2F ; So # [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B45..2B46 ; So # [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B4D..2B73 ; So # [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; So # [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2CE5..2CEA ; So # [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2E50..2E51 ; So # [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; So # [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; So # [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFF ; So # [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION +3004 ; So # JAPANESE INDUSTRIAL STANDARD SYMBOL +3012..3013 ; So # [2] POSTAL MARK..GETA MARK +3020 ; So # POSTAL MARK FACE +3036..3037 ; So # [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303E..303F ; So # [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +3190..3191 ; So # [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3196..319F ; So # [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31C0..31E5 ; So # [38] CJK STROKE T..CJK STROKE SZP +31EF ; So # IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION +3200..321E ; So # [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +322A..3247 ; So # [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3250 ; So # PARTNERSHIP SIGN +3260..327F ; So # [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL +328A..32B0 ; So # [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32C0..33FF ; So # [320] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL +4DC0..4DFF ; So # [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A490..A4C6 ; So # [55] YI RADICAL QOT..YI RADICAL KE +A828..A82B ; So # [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A836..A837 ; So # [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A839 ; So # NORTH INDIC QUANTITY MARK +AA77..AA79 ; So # [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +FD40..FD4F ; So # [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FDCF ; So # ARABIC LIGATURE SALAAMUHU ALAYNAA +FDFD..FDFF ; So # [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FFE4 ; So # FULLWIDTH BROKEN BAR +FFE8 ; So # HALFWIDTH FORMS LIGHT VERTICAL +FFED..FFEE ; So # [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10137..1013F ; So # [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10179..10189 ; So # [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018C..1018E ; So # [3] GREEK SINUSOID SIGN..NOMISMA SIGN +10190..1019C ; So # [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; So # GREEK SYMBOL TAU RHO +101D0..101FC ; So # [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +10877..10878 ; So # [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10AC8 ; So # MANICHAEAN SIGN UD +1173F ; So # AHOM SYMBOL VI +11FD5..11FDC ; So # [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FE1..11FF1 ; So # [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +16B3C..16B3F ; So # [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B45 ; So # PAHAWH HMONG SIGN CIM TSOV ROG +1BC9C ; So # DUPLOYAN SIGN O WITH CROSS +1CC00..1CCEF ; So # [240] UP-POINTING GO-KART..OUTLINED LATIN CAPITAL LETTER Z +1CD00..1CEB3 ; So # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; So # [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D16A..1D16C ; So # [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D183..1D184 ; So # [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; So # [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1EA ; So # [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D200..1D241 ; So # [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D245 ; So # GREEK MUSICAL LEIMMA +1D300..1D356 ; So # [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D800..1D9FF ; So # [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA37..1DA3A ; So # [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA6D..1DA74 ; So # [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA76..1DA83 ; So # [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA85..1DA86 ; So # [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1E14F ; So # NYIAKENG PUACHUE HMONG CIRCLED CA +1ECAC ; So # INDIC SIYAQ PLACEHOLDER +1ED2E ; So # OTTOMAN SIYAQ MARRATAN +1F000..1F02B ; So # [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; So # [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; So # [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; So # [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; So # [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; So # [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F10D..1F1AD ; So # [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL +1F1E6..1F202 ; So # [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA +1F210..1F23B ; So # [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; So # [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; So # [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265 ; So # [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; So # [251] CYCLONE..AMPHORA +1F400..1F6D7 ; So # [728] RAT..ELEVATOR +1F6DC..1F6EC ; So # [17] WIRELESS..AIRPLANE ARRIVING +1F6F0..1F6FC ; So # [13] SATELLITE..ROLLER SKATE +1F700..1F776 ; So # [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F7D9 ; So # [95] HAUMEA..NINE POINTED WHITE STAR +1F7E0..1F7EB ; So # [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; So # HEAVY EQUALS SIGN +1F800..1F80B ; So # [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; So # [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; So # [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8BB ; So # [12] ARROW POINTING UPWARDS THEN NORTH WEST..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; So # [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW +1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH +1FA80..1FA89 ; So # [10] YO-YO..HARP +1FA8F..1FAC6 ; So # [56] SHOVEL..FINGERPRINT +1FACE..1FADC ; So # [15] MOOSE..ROOT VEGETABLE +1FADF..1FAE9 ; So # [11] SPLATTER..FACE WITH BAGS UNDER EYES +1FAF0..1FAF8 ; So # [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND +1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE + +# Total code points: 7376 + +# ================================================ + +# General_Category=Initial_Punctuation + +00AB ; Pi # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Pi # LEFT SINGLE QUOTATION MARK +201B..201C ; Pi # [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201F ; Pi # DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Pi # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +2E02 ; Pi # LEFT SUBSTITUTION BRACKET +2E04 ; Pi # LEFT DOTTED SUBSTITUTION BRACKET +2E09 ; Pi # LEFT TRANSPOSITION BRACKET +2E0C ; Pi # LEFT RAISED OMISSION BRACKET +2E1C ; Pi # LEFT LOW PARAPHRASE BRACKET +2E20 ; Pi # LEFT VERTICAL BAR WITH QUILL + +# Total code points: 12 + +# ================================================ + +# General_Category=Final_Punctuation + +00BB ; Pf # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2019 ; Pf # RIGHT SINGLE QUOTATION MARK +201D ; Pf # RIGHT DOUBLE QUOTATION MARK +203A ; Pf # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E03 ; Pf # RIGHT SUBSTITUTION BRACKET +2E05 ; Pf # RIGHT DOTTED SUBSTITUTION BRACKET +2E0A ; Pf # RIGHT TRANSPOSITION BRACKET +2E0D ; Pf # RIGHT RAISED OMISSION BRACKET +2E1D ; Pf # RIGHT LOW PARAPHRASE BRACKET +2E21 ; Pf # RIGHT VERTICAL BAR WITH QUILL + +# Total code points: 10 + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/GraphemeBreakProperty.txt b/3rd/pcre2/maint/Unicode.tables/GraphemeBreakProperty.txt new file mode 100644 index 00000000..a863397d --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/GraphemeBreakProperty.txt @@ -0,0 +1,1503 @@ +# GraphemeBreakProperty-16.0.0.txt +# Date: 2024-05-31, 18:09:38 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +# Property: Grapheme_Cluster_Break + +# All code points not explicitly listed for Grapheme_Cluster_Break +# have the value Other (XX). + +# @missing: 0000..10FFFF; Other + +# ================================================ + +0600..0605 ; Prepend # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepend # Cf ARABIC END OF AYAH +070F ; Prepend # Cf SYRIAC ABBREVIATION MARK +0890..0891 ; Prepend # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Prepend # Cf ARABIC DISPUTED END OF AYAH +0D4E ; Prepend # Lo MALAYALAM LETTER DOT REPH +110BD ; Prepend # Cf KAITHI NUMBER SIGN +110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE +111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA +113D1 ; Prepend # Lo TULU-TIGALARI REPHA +1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; Prepend # Lo DIVES AKURU INITIAL RA +11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA +11D46 ; Prepend # Lo MASARAM GONDI REPHA +11F02 ; Prepend # Lo KAWI SIGN REPHA + +# Total code points: 28 + +# ================================================ + +000D ; CR # Cc + +# Total code points: 1 + +# ================================================ + +000A ; LF # Cc + +# Total code points: 1 + +# ================================================ + +0000..0009 ; Control # Cc [10] .. +000B..000C ; Control # Cc [2] .. +000E..001F ; Control # Cc [18] .. +007F..009F ; Control # Cc [33] .. +00AD ; Control # Cf SOFT HYPHEN +061C ; Control # Cf ARABIC LETTER MARK +180E ; Control # Cf MONGOLIAN VOWEL SEPARATOR +200B ; Control # Cf ZERO WIDTH SPACE +200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Control # Zl LINE SEPARATOR +2029 ; Control # Zp PARAGRAPH SEPARATOR +202A..202E ; Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; Control # Cn +2066..206F ; Control # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; Control # Cn [9] .. +FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +13430..1343F ; Control # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Control # Cn +E0001 ; Control # Cf LANGUAGE TAG +E0002..E001F ; Control # Cn [30] .. +E0080..E00FF ; Control # Cn [128] .. +E01F0..E0FFF ; Control # Cn [3600] .. + +# Total code points: 3893 + +# ================================================ + +0300..036F ; Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Extend # Mn NKO DANTAYALAN +0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE +093C ; Extend # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Extend # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Extend # Mn BENGALI SIGN CANDRABINDU +09BC ; Extend # Mn BENGALI SIGN NUKTA +09BE ; Extend # Mc BENGALI VOWEL SIGN AA +09C1..09C4 ; Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Extend # Mn BENGALI SIGN VIRAMA +09D7 ; Extend # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Extend # Mn BENGALI SANDHI MARK +0A01..0A02 ; Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Extend # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Extend # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Extend # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Extend # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Extend # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Extend # Mn ORIYA SIGN CANDRABINDU +0B3C ; Extend # Mn ORIYA SIGN NUKTA +0B3E ; Extend # Mc ORIYA VOWEL SIGN AA +0B3F ; Extend # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Extend # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Extend # Mn TAMIL SIGN ANUSVARA +0BBE ; Extend # Mc TAMIL VOWEL SIGN AA +0BC0 ; Extend # Mn TAMIL VOWEL SIGN II +0BCD ; Extend # Mn TAMIL SIGN VIRAMA +0BD7 ; Extend # Mc TAMIL AU LENGTH MARK +0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Extend # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU +0CBC ; Extend # Mn KANNADA SIGN NUKTA +0CBF ; Extend # Mn KANNADA VOWEL SIGN I +0CC0 ; Extend # Mc KANNADA VOWEL SIGN II +0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU +0CC6 ; Extend # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA +0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA +0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU +0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA +0DCF ; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DDF ; Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +0E31 ; Extend # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Extend # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECE ; Extend # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0F18..0F19 ; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Extend # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Extend # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; Extend # Mc TAGALOG SIGN PAMUDPOD +1732..1733 ; Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; Extend # Mc HANUNOO SIGN PAMUDPOD +1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Extend # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Extend # Mn KHMER SIGN ATTHACAN +180B..180D ; Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Extend # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Extend # Mn BUGINESE VOWEL SIGN AE +1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Extend # Mn TAI THAM SIGN SAKOT +1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Extend # Mn BALINESE SIGN REREKAN +1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B3D ; Extend # Mc BALINESE VOWEL SIGN LA LENGA TEDUNG +1B42 ; Extend # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B6B..1B73 ; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Extend # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Extend # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; Extend # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Extend # Cf ZERO WIDTH NON-JOINER +20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A953 ; Extend # Mc REJANG VIRAMA +A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9C0 ; Extend # Mc JAVANESE PANGKON +A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; Extend # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA +ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Extend # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Extend # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Extend # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C0 ; Extend # Mc SHARADA SIGN VIRAMA +111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Extend # Mn KHOJKI SIGN ANUSVARA +11235 ; Extend # Mc KHOJKI SIGN VIRAMA +11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Extend # Mn KHOJKI SIGN SUKUN +11241 ; Extend # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133E ; Extend # Mc GRANTHA VOWEL SIGN AA +11340 ; Extend # Mn GRANTHA VOWEL SIGN II +1134D ; Extend # Mc GRANTHA SIGN VIRAMA +11357 ; Extend # Mc GRANTHA AU LENGTH MARK +11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113B8 ; Extend # Mc TULU-TIGALARI VOWEL SIGN AA +113BB..113C0 ; Extend # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; Extend # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK +113CE ; Extend # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Extend # Mn TULU-TIGALARI CONJOINER +113D2 ; Extend # Mn TULU-TIGALARI GEMINATION MARK +113E1..113E2 ; Extend # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11438..1143F ; Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Extend # Mn NEWA SIGN NUKTA +1145E ; Extend # Mn NEWA SANDHI MARK +114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA +114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BD ; Extend # Mc TIRHUTA VOWEL SIGN SHORT O +114BF..114C0 ; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF ; Extend # Mc SIDDHAM VOWEL SIGN AA +115B2..115B5 ; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Extend # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Extend # Mn MODI SIGN ANUSVARA +1163F..11640 ; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Extend # Mn TAKRI SIGN NUKTA +1171D ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LA +1171F ; Extend # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +11930 ; Extend # Mc DIVES AKURU VOWEL SIGN AA +1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; Extend # Mc DIVES AKURU SIGN HALANTA +1193E ; Extend # Mn DIVES AKURU VIRAMA +11943 ; Extend # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Extend # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Extend # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; Extend # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11F00..11F01 ; Extend # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F36..11F3A ; Extend # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F40 ; Extend # Mn KAWI VOWEL SIGN EU +11F41 ; Extend # Mc KAWI SIGN KILLER +11F42 ; Extend # Mn KAWI CONJOINER +11F5A ; Extend # Mn KAWI SIGN NUKTA +13440 ; Extend # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13447..13455 ; Extend # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1611E..16129 ; Extend # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612D..1612F ; Extend # Mn [3] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Extend # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E5EE..1E5EF ; Extend # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1F3FB..1F3FF ; Extend # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2198 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + +0903 ; SpacingMark # Mc DEVANAGARI SIGN VISARGA +093B ; SpacingMark # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; SpacingMark # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; SpacingMark # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; SpacingMark # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0982..0983 ; SpacingMark # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BF..09C0 ; SpacingMark # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II +09C7..09C8 ; SpacingMark # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; SpacingMark # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +0A03 ; SpacingMark # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; SpacingMark # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A83 ; SpacingMark # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; SpacingMark # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; SpacingMark # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; SpacingMark # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0B02..0B03 ; SpacingMark # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B40 ; SpacingMark # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; SpacingMark # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; SpacingMark # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0BBF ; SpacingMark # Mc TAMIL VOWEL SIGN I +0BC1..0BC2 ; SpacingMark # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; SpacingMark # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; SpacingMark # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0C01..0C03 ; SpacingMark # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C41..0C44 ; SpacingMark # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C82..0C83 ; SpacingMark # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; SpacingMark # Mc KANNADA VOWEL SIGN AA +0CC1 ; SpacingMark # Mc KANNADA VOWEL SIGN U +0CC3..0CC4 ; SpacingMark # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR +0CF3 ; SpacingMark # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D02..0D03 ; SpacingMark # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3F..0D40 ; SpacingMark # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II +0D46..0D48 ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; SpacingMark # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D82..0D83 ; SpacingMark # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DD0..0DD1 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDE ; SpacingMark # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA +0DF2..0DF3 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E33 ; SpacingMark # Lo THAI CHARACTER SARA AM +0EB3 ; SpacingMark # Lo LAO VOWEL SIGN AM +0F3E..0F3F ; SpacingMark # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F7F ; SpacingMark # Mc TIBETAN SIGN RNAM BCAD +1031 ; SpacingMark # Mc MYANMAR VOWEL SIGN E +103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E +17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1923..1926 ; SpacingMark # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; SpacingMark # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; SpacingMark # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; SpacingMark # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A19..1A1A ; SpacingMark # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A55 ; SpacingMark # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1B04 ; SpacingMark # Mc BALINESE SIGN BISAH +1B3E..1B41 ; SpacingMark # Mc [4] BALINESE VOWEL SIGN TALING..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B82 ; SpacingMark # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U +1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CF7 ; SpacingMark # Mc VEDIC SIGN ATIKRAMA +A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; SpacingMark # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A952 ; SpacingMark # Mc REJANG CONSONANT SIGN H +A983 ; SpacingMark # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; SpacingMark # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9BF ; SpacingMark # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H +AAEB ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK +11000 ; SpacingMark # Mc BRAHMI SIGN CANDRABINDU +11002 ; SpacingMark # Mc BRAHMI SIGN VISARGA +11082 ; SpacingMark # Mc KAITHI SIGN VISARGA +110B0..110B2 ; SpacingMark # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; SpacingMark # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; SpacingMark # Mc CHAKMA VOWEL SIGN E +11145..11146 ; SpacingMark # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11182 ; SpacingMark # Mc SHARADA SIGN VISARGA +111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF ; SpacingMark # Mc SHARADA VOWEL SIGN AU +111CE ; SpacingMark # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +112E0..112E2 ; SpacingMark # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +11302..11303 ; SpacingMark # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133F ; SpacingMark # Mc GRANTHA VOWEL SIGN I +11341..11344 ; SpacingMark # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B9..113BA ; SpacingMark # Mc [2] TULU-TIGALARI VOWEL SIGN I..TULU-TIGALARI VOWEL SIGN II +113CA ; SpacingMark # Mc TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; SpacingMark # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +11435..11437 ; SpacingMark # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; SpacingMark # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; SpacingMark # Mc NEWA SIGN VISARGA +114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II +114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E +114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O +114BE ; SpacingMark # Mc TIRHUTA VOWEL SIGN AU +114C1 ; SpacingMark # Mc TIRHUTA SIGN VISARGA +115B0..115B1 ; SpacingMark # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II +115B8..115BB ; SpacingMark # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; SpacingMark # Mc SIDDHAM SIGN VISARGA +11630..11632 ; SpacingMark # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; SpacingMark # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; SpacingMark # Mc MODI SIGN VISARGA +116AC ; SpacingMark # Mc TAKRI SIGN VISARGA +116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +1171E ; SpacingMark # Mc AHOM CONSONANT SIGN MEDIAL RA +11726 ; SpacingMark # Mc AHOM VOWEL SIGN E +1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; SpacingMark # Mc DOGRA SIGN VISARGA +11931..11935 ; SpacingMark # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E +11937..11938 ; SpacingMark # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +11940 ; SpacingMark # Mc DIVES AKURU MEDIAL YA +11942 ; SpacingMark # Mc DIVES AKURU MEDIAL RA +119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; SpacingMark # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E4 ; SpacingMark # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A39 ; SpacingMark # Mc ZANABAZAR SQUARE SIGN VISARGA +11A57..11A58 ; SpacingMark # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A97 ; SpacingMark # Mc SOYOMBO SIGN VISARGA +11C2F ; SpacingMark # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; SpacingMark # Mc BHAIKSUKI SIGN VISARGA +11CA9 ; SpacingMark # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; SpacingMark # Mc MARCHEN VOWEL SIGN I +11CB4 ; SpacingMark # Mc MARCHEN VOWEL SIGN O +11D8A..11D8E ; SpacingMark # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; SpacingMark # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA +11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F03 ; SpacingMark # Mc KAWI SIGN VISARGA +11F34..11F35 ; SpacingMark # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F3E..11F3F ; SpacingMark # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI + +# Total code points: 378 + +# ================================================ + +1100..115F ; L # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER +A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH + +# Total code points: 125 + +# ================================================ + +1160..11A7 ; V # Lo [72] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG O-YAE +D7B0..D7C6 ; V # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +16D63 ; V # Lo KIRAT RAI VOWEL SIGN AA +16D67..16D6A ; V # Lo [4] KIRAT RAI VOWEL SIGN E..KIRAT RAI VOWEL SIGN AU + +# Total code points: 100 + +# ================================================ + +11A8..11FF ; T # Lo [88] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN +D7CB..D7FB ; T # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH + +# Total code points: 137 + +# ================================================ + +AC00 ; LV # Lo HANGUL SYLLABLE GA +AC1C ; LV # Lo HANGUL SYLLABLE GAE +AC38 ; LV # Lo HANGUL SYLLABLE GYA +AC54 ; LV # Lo HANGUL SYLLABLE GYAE +AC70 ; LV # Lo HANGUL SYLLABLE GEO +AC8C ; LV # Lo HANGUL SYLLABLE GE +ACA8 ; LV # Lo HANGUL SYLLABLE GYEO +ACC4 ; LV # Lo HANGUL SYLLABLE GYE +ACE0 ; LV # Lo HANGUL SYLLABLE GO +ACFC ; LV # Lo HANGUL SYLLABLE GWA +AD18 ; LV # Lo HANGUL SYLLABLE GWAE +AD34 ; LV # Lo HANGUL SYLLABLE GOE +AD50 ; LV # Lo HANGUL SYLLABLE GYO +AD6C ; LV # Lo HANGUL SYLLABLE GU +AD88 ; LV # Lo HANGUL SYLLABLE GWEO +ADA4 ; LV # Lo HANGUL SYLLABLE GWE +ADC0 ; LV # Lo HANGUL SYLLABLE GWI +ADDC ; LV # Lo HANGUL SYLLABLE GYU +ADF8 ; LV # Lo HANGUL SYLLABLE GEU +AE14 ; LV # Lo HANGUL SYLLABLE GYI +AE30 ; LV # Lo HANGUL SYLLABLE GI +AE4C ; LV # Lo HANGUL SYLLABLE GGA +AE68 ; LV # Lo HANGUL SYLLABLE GGAE +AE84 ; LV # Lo HANGUL SYLLABLE GGYA +AEA0 ; LV # Lo HANGUL SYLLABLE GGYAE +AEBC ; LV # Lo HANGUL SYLLABLE GGEO +AED8 ; LV # Lo HANGUL SYLLABLE GGE +AEF4 ; LV # Lo HANGUL SYLLABLE GGYEO +AF10 ; LV # Lo HANGUL SYLLABLE GGYE +AF2C ; LV # Lo HANGUL SYLLABLE GGO +AF48 ; LV # Lo HANGUL SYLLABLE GGWA +AF64 ; LV # Lo HANGUL SYLLABLE GGWAE +AF80 ; LV # Lo HANGUL SYLLABLE GGOE +AF9C ; LV # Lo HANGUL SYLLABLE GGYO +AFB8 ; LV # Lo HANGUL SYLLABLE GGU +AFD4 ; LV # Lo HANGUL SYLLABLE GGWEO +AFF0 ; LV # Lo HANGUL SYLLABLE GGWE +B00C ; LV # Lo HANGUL SYLLABLE GGWI +B028 ; LV # Lo HANGUL SYLLABLE GGYU +B044 ; LV # Lo HANGUL SYLLABLE GGEU +B060 ; LV # Lo HANGUL SYLLABLE GGYI +B07C ; LV # Lo HANGUL SYLLABLE GGI +B098 ; LV # Lo HANGUL SYLLABLE NA +B0B4 ; LV # Lo HANGUL SYLLABLE NAE +B0D0 ; LV # Lo HANGUL SYLLABLE NYA +B0EC ; LV # Lo HANGUL SYLLABLE NYAE +B108 ; LV # Lo HANGUL SYLLABLE NEO +B124 ; LV # Lo HANGUL SYLLABLE NE +B140 ; LV # Lo HANGUL SYLLABLE NYEO +B15C ; LV # Lo HANGUL SYLLABLE NYE +B178 ; LV # Lo HANGUL SYLLABLE NO +B194 ; LV # Lo HANGUL SYLLABLE NWA +B1B0 ; LV # Lo HANGUL SYLLABLE NWAE +B1CC ; LV # Lo HANGUL SYLLABLE NOE +B1E8 ; LV # Lo HANGUL SYLLABLE NYO +B204 ; LV # Lo HANGUL SYLLABLE NU +B220 ; LV # Lo HANGUL SYLLABLE NWEO +B23C ; LV # Lo HANGUL SYLLABLE NWE +B258 ; LV # Lo HANGUL SYLLABLE NWI +B274 ; LV # Lo HANGUL SYLLABLE NYU +B290 ; LV # Lo HANGUL SYLLABLE NEU +B2AC ; LV # Lo HANGUL SYLLABLE NYI +B2C8 ; LV # Lo HANGUL SYLLABLE NI +B2E4 ; LV # Lo HANGUL SYLLABLE DA +B300 ; LV # Lo HANGUL SYLLABLE DAE +B31C ; LV # Lo HANGUL SYLLABLE DYA +B338 ; LV # Lo HANGUL SYLLABLE DYAE +B354 ; LV # Lo HANGUL SYLLABLE DEO +B370 ; LV # Lo HANGUL SYLLABLE DE +B38C ; LV # Lo HANGUL SYLLABLE DYEO +B3A8 ; LV # Lo HANGUL SYLLABLE DYE +B3C4 ; LV # Lo HANGUL SYLLABLE DO +B3E0 ; LV # Lo HANGUL SYLLABLE DWA +B3FC ; LV # Lo HANGUL SYLLABLE DWAE +B418 ; LV # Lo HANGUL SYLLABLE DOE +B434 ; LV # Lo HANGUL SYLLABLE DYO +B450 ; LV # Lo HANGUL SYLLABLE DU +B46C ; LV # Lo HANGUL SYLLABLE DWEO +B488 ; LV # Lo HANGUL SYLLABLE DWE +B4A4 ; LV # Lo HANGUL SYLLABLE DWI +B4C0 ; LV # Lo HANGUL SYLLABLE DYU +B4DC ; LV # Lo HANGUL SYLLABLE DEU +B4F8 ; LV # Lo HANGUL SYLLABLE DYI +B514 ; LV # Lo HANGUL SYLLABLE DI +B530 ; LV # Lo HANGUL SYLLABLE DDA +B54C ; LV # Lo HANGUL SYLLABLE DDAE +B568 ; LV # Lo HANGUL SYLLABLE DDYA +B584 ; LV # Lo HANGUL SYLLABLE DDYAE +B5A0 ; LV # Lo HANGUL SYLLABLE DDEO +B5BC ; LV # Lo HANGUL SYLLABLE DDE +B5D8 ; LV # Lo HANGUL SYLLABLE DDYEO +B5F4 ; LV # Lo HANGUL SYLLABLE DDYE +B610 ; LV # Lo HANGUL SYLLABLE DDO +B62C ; LV # Lo HANGUL SYLLABLE DDWA +B648 ; LV # Lo HANGUL SYLLABLE DDWAE +B664 ; LV # Lo HANGUL SYLLABLE DDOE +B680 ; LV # Lo HANGUL SYLLABLE DDYO +B69C ; LV # Lo HANGUL SYLLABLE DDU +B6B8 ; LV # Lo HANGUL SYLLABLE DDWEO +B6D4 ; LV # Lo HANGUL SYLLABLE DDWE +B6F0 ; LV # Lo HANGUL SYLLABLE DDWI +B70C ; LV # Lo HANGUL SYLLABLE DDYU +B728 ; LV # Lo HANGUL SYLLABLE DDEU +B744 ; LV # Lo HANGUL SYLLABLE DDYI +B760 ; LV # Lo HANGUL SYLLABLE DDI +B77C ; LV # Lo HANGUL SYLLABLE RA +B798 ; LV # Lo HANGUL SYLLABLE RAE +B7B4 ; LV # Lo HANGUL SYLLABLE RYA +B7D0 ; LV # Lo HANGUL SYLLABLE RYAE +B7EC ; LV # Lo HANGUL SYLLABLE REO +B808 ; LV # Lo HANGUL SYLLABLE RE +B824 ; LV # Lo HANGUL SYLLABLE RYEO +B840 ; LV # Lo HANGUL SYLLABLE RYE +B85C ; LV # Lo HANGUL SYLLABLE RO +B878 ; LV # Lo HANGUL SYLLABLE RWA +B894 ; LV # Lo HANGUL SYLLABLE RWAE +B8B0 ; LV # Lo HANGUL SYLLABLE ROE +B8CC ; LV # Lo HANGUL SYLLABLE RYO +B8E8 ; LV # Lo HANGUL SYLLABLE RU +B904 ; LV # Lo HANGUL SYLLABLE RWEO +B920 ; LV # Lo HANGUL SYLLABLE RWE +B93C ; LV # Lo HANGUL SYLLABLE RWI +B958 ; LV # Lo HANGUL SYLLABLE RYU +B974 ; LV # Lo HANGUL SYLLABLE REU +B990 ; LV # Lo HANGUL SYLLABLE RYI +B9AC ; LV # Lo HANGUL SYLLABLE RI +B9C8 ; LV # Lo HANGUL SYLLABLE MA +B9E4 ; LV # Lo HANGUL SYLLABLE MAE +BA00 ; LV # Lo HANGUL SYLLABLE MYA +BA1C ; LV # Lo HANGUL SYLLABLE MYAE +BA38 ; LV # Lo HANGUL SYLLABLE MEO +BA54 ; LV # Lo HANGUL SYLLABLE ME +BA70 ; LV # Lo HANGUL SYLLABLE MYEO +BA8C ; LV # Lo HANGUL SYLLABLE MYE +BAA8 ; LV # Lo HANGUL SYLLABLE MO +BAC4 ; LV # Lo HANGUL SYLLABLE MWA +BAE0 ; LV # Lo HANGUL SYLLABLE MWAE +BAFC ; LV # Lo HANGUL SYLLABLE MOE +BB18 ; LV # Lo HANGUL SYLLABLE MYO +BB34 ; LV # Lo HANGUL SYLLABLE MU +BB50 ; LV # Lo HANGUL SYLLABLE MWEO +BB6C ; LV # Lo HANGUL SYLLABLE MWE +BB88 ; LV # Lo HANGUL SYLLABLE MWI +BBA4 ; LV # Lo HANGUL SYLLABLE MYU +BBC0 ; LV # Lo HANGUL SYLLABLE MEU +BBDC ; LV # Lo HANGUL SYLLABLE MYI +BBF8 ; LV # Lo HANGUL SYLLABLE MI +BC14 ; LV # Lo HANGUL SYLLABLE BA +BC30 ; LV # Lo HANGUL SYLLABLE BAE +BC4C ; LV # Lo HANGUL SYLLABLE BYA +BC68 ; LV # Lo HANGUL SYLLABLE BYAE +BC84 ; LV # Lo HANGUL SYLLABLE BEO +BCA0 ; LV # Lo HANGUL SYLLABLE BE +BCBC ; LV # Lo HANGUL SYLLABLE BYEO +BCD8 ; LV # Lo HANGUL SYLLABLE BYE +BCF4 ; LV # Lo HANGUL SYLLABLE BO +BD10 ; LV # Lo HANGUL SYLLABLE BWA +BD2C ; LV # Lo HANGUL SYLLABLE BWAE +BD48 ; LV # Lo HANGUL SYLLABLE BOE +BD64 ; LV # Lo HANGUL SYLLABLE BYO +BD80 ; LV # Lo HANGUL SYLLABLE BU +BD9C ; LV # Lo HANGUL SYLLABLE BWEO +BDB8 ; LV # Lo HANGUL SYLLABLE BWE +BDD4 ; LV # Lo HANGUL SYLLABLE BWI +BDF0 ; LV # Lo HANGUL SYLLABLE BYU +BE0C ; LV # Lo HANGUL SYLLABLE BEU +BE28 ; LV # Lo HANGUL SYLLABLE BYI +BE44 ; LV # Lo HANGUL SYLLABLE BI +BE60 ; LV # Lo HANGUL SYLLABLE BBA +BE7C ; LV # Lo HANGUL SYLLABLE BBAE +BE98 ; LV # Lo HANGUL SYLLABLE BBYA +BEB4 ; LV # Lo HANGUL SYLLABLE BBYAE +BED0 ; LV # Lo HANGUL SYLLABLE BBEO +BEEC ; LV # Lo HANGUL SYLLABLE BBE +BF08 ; LV # Lo HANGUL SYLLABLE BBYEO +BF24 ; LV # Lo HANGUL SYLLABLE BBYE +BF40 ; LV # Lo HANGUL SYLLABLE BBO +BF5C ; LV # Lo HANGUL SYLLABLE BBWA +BF78 ; LV # Lo HANGUL SYLLABLE BBWAE +BF94 ; LV # Lo HANGUL SYLLABLE BBOE +BFB0 ; LV # Lo HANGUL SYLLABLE BBYO +BFCC ; LV # Lo HANGUL SYLLABLE BBU +BFE8 ; LV # Lo HANGUL SYLLABLE BBWEO +C004 ; LV # Lo HANGUL SYLLABLE BBWE +C020 ; LV # Lo HANGUL SYLLABLE BBWI +C03C ; LV # Lo HANGUL SYLLABLE BBYU +C058 ; LV # Lo HANGUL SYLLABLE BBEU +C074 ; LV # Lo HANGUL SYLLABLE BBYI +C090 ; LV # Lo HANGUL SYLLABLE BBI +C0AC ; LV # Lo HANGUL SYLLABLE SA +C0C8 ; LV # Lo HANGUL SYLLABLE SAE +C0E4 ; LV # Lo HANGUL SYLLABLE SYA +C100 ; LV # Lo HANGUL SYLLABLE SYAE +C11C ; LV # Lo HANGUL SYLLABLE SEO +C138 ; LV # Lo HANGUL SYLLABLE SE +C154 ; LV # Lo HANGUL SYLLABLE SYEO +C170 ; LV # Lo HANGUL SYLLABLE SYE +C18C ; LV # Lo HANGUL SYLLABLE SO +C1A8 ; LV # Lo HANGUL SYLLABLE SWA +C1C4 ; LV # Lo HANGUL SYLLABLE SWAE +C1E0 ; LV # Lo HANGUL SYLLABLE SOE +C1FC ; LV # Lo HANGUL SYLLABLE SYO +C218 ; LV # Lo HANGUL SYLLABLE SU +C234 ; LV # Lo HANGUL SYLLABLE SWEO +C250 ; LV # Lo HANGUL SYLLABLE SWE +C26C ; LV # Lo HANGUL SYLLABLE SWI +C288 ; LV # Lo HANGUL SYLLABLE SYU +C2A4 ; LV # Lo HANGUL SYLLABLE SEU +C2C0 ; LV # Lo HANGUL SYLLABLE SYI +C2DC ; LV # Lo HANGUL SYLLABLE SI +C2F8 ; LV # Lo HANGUL SYLLABLE SSA +C314 ; LV # Lo HANGUL SYLLABLE SSAE +C330 ; LV # Lo HANGUL SYLLABLE SSYA +C34C ; LV # Lo HANGUL SYLLABLE SSYAE +C368 ; LV # Lo HANGUL SYLLABLE SSEO +C384 ; LV # Lo HANGUL SYLLABLE SSE +C3A0 ; LV # Lo HANGUL SYLLABLE SSYEO +C3BC ; LV # Lo HANGUL SYLLABLE SSYE +C3D8 ; LV # Lo HANGUL SYLLABLE SSO +C3F4 ; LV # Lo HANGUL SYLLABLE SSWA +C410 ; LV # Lo HANGUL SYLLABLE SSWAE +C42C ; LV # Lo HANGUL SYLLABLE SSOE +C448 ; LV # Lo HANGUL SYLLABLE SSYO +C464 ; LV # Lo HANGUL SYLLABLE SSU +C480 ; LV # Lo HANGUL SYLLABLE SSWEO +C49C ; LV # Lo HANGUL SYLLABLE SSWE +C4B8 ; LV # Lo HANGUL SYLLABLE SSWI +C4D4 ; LV # Lo HANGUL SYLLABLE SSYU +C4F0 ; LV # Lo HANGUL SYLLABLE SSEU +C50C ; LV # Lo HANGUL SYLLABLE SSYI +C528 ; LV # Lo HANGUL SYLLABLE SSI +C544 ; LV # Lo HANGUL SYLLABLE A +C560 ; LV # Lo HANGUL SYLLABLE AE +C57C ; LV # Lo HANGUL SYLLABLE YA +C598 ; LV # Lo HANGUL SYLLABLE YAE +C5B4 ; LV # Lo HANGUL SYLLABLE EO +C5D0 ; LV # Lo HANGUL SYLLABLE E +C5EC ; LV # Lo HANGUL SYLLABLE YEO +C608 ; LV # Lo HANGUL SYLLABLE YE +C624 ; LV # Lo HANGUL SYLLABLE O +C640 ; LV # Lo HANGUL SYLLABLE WA +C65C ; LV # Lo HANGUL SYLLABLE WAE +C678 ; LV # Lo HANGUL SYLLABLE OE +C694 ; LV # Lo HANGUL SYLLABLE YO +C6B0 ; LV # Lo HANGUL SYLLABLE U +C6CC ; LV # Lo HANGUL SYLLABLE WEO +C6E8 ; LV # Lo HANGUL SYLLABLE WE +C704 ; LV # Lo HANGUL SYLLABLE WI +C720 ; LV # Lo HANGUL SYLLABLE YU +C73C ; LV # Lo HANGUL SYLLABLE EU +C758 ; LV # Lo HANGUL SYLLABLE YI +C774 ; LV # Lo HANGUL SYLLABLE I +C790 ; LV # Lo HANGUL SYLLABLE JA +C7AC ; LV # Lo HANGUL SYLLABLE JAE +C7C8 ; LV # Lo HANGUL SYLLABLE JYA +C7E4 ; LV # Lo HANGUL SYLLABLE JYAE +C800 ; LV # Lo HANGUL SYLLABLE JEO +C81C ; LV # Lo HANGUL SYLLABLE JE +C838 ; LV # Lo HANGUL SYLLABLE JYEO +C854 ; LV # Lo HANGUL SYLLABLE JYE +C870 ; LV # Lo HANGUL SYLLABLE JO +C88C ; LV # Lo HANGUL SYLLABLE JWA +C8A8 ; LV # Lo HANGUL SYLLABLE JWAE +C8C4 ; LV # Lo HANGUL SYLLABLE JOE +C8E0 ; LV # Lo HANGUL SYLLABLE JYO +C8FC ; LV # Lo HANGUL SYLLABLE JU +C918 ; LV # Lo HANGUL SYLLABLE JWEO +C934 ; LV # Lo HANGUL SYLLABLE JWE +C950 ; LV # Lo HANGUL SYLLABLE JWI +C96C ; LV # Lo HANGUL SYLLABLE JYU +C988 ; LV # Lo HANGUL SYLLABLE JEU +C9A4 ; LV # Lo HANGUL SYLLABLE JYI +C9C0 ; LV # Lo HANGUL SYLLABLE JI +C9DC ; LV # Lo HANGUL SYLLABLE JJA +C9F8 ; LV # Lo HANGUL SYLLABLE JJAE +CA14 ; LV # Lo HANGUL SYLLABLE JJYA +CA30 ; LV # Lo HANGUL SYLLABLE JJYAE +CA4C ; LV # Lo HANGUL SYLLABLE JJEO +CA68 ; LV # Lo HANGUL SYLLABLE JJE +CA84 ; LV # Lo HANGUL SYLLABLE JJYEO +CAA0 ; LV # Lo HANGUL SYLLABLE JJYE +CABC ; LV # Lo HANGUL SYLLABLE JJO +CAD8 ; LV # Lo HANGUL SYLLABLE JJWA +CAF4 ; LV # Lo HANGUL SYLLABLE JJWAE +CB10 ; LV # Lo HANGUL SYLLABLE JJOE +CB2C ; LV # Lo HANGUL SYLLABLE JJYO +CB48 ; LV # Lo HANGUL SYLLABLE JJU +CB64 ; LV # Lo HANGUL SYLLABLE JJWEO +CB80 ; LV # Lo HANGUL SYLLABLE JJWE +CB9C ; LV # Lo HANGUL SYLLABLE JJWI +CBB8 ; LV # Lo HANGUL SYLLABLE JJYU +CBD4 ; LV # Lo HANGUL SYLLABLE JJEU +CBF0 ; LV # Lo HANGUL SYLLABLE JJYI +CC0C ; LV # Lo HANGUL SYLLABLE JJI +CC28 ; LV # Lo HANGUL SYLLABLE CA +CC44 ; LV # Lo HANGUL SYLLABLE CAE +CC60 ; LV # Lo HANGUL SYLLABLE CYA +CC7C ; LV # Lo HANGUL SYLLABLE CYAE +CC98 ; LV # Lo HANGUL SYLLABLE CEO +CCB4 ; LV # Lo HANGUL SYLLABLE CE +CCD0 ; LV # Lo HANGUL SYLLABLE CYEO +CCEC ; LV # Lo HANGUL SYLLABLE CYE +CD08 ; LV # Lo HANGUL SYLLABLE CO +CD24 ; LV # Lo HANGUL SYLLABLE CWA +CD40 ; LV # Lo HANGUL SYLLABLE CWAE +CD5C ; LV # Lo HANGUL SYLLABLE COE +CD78 ; LV # Lo HANGUL SYLLABLE CYO +CD94 ; LV # Lo HANGUL SYLLABLE CU +CDB0 ; LV # Lo HANGUL SYLLABLE CWEO +CDCC ; LV # Lo HANGUL SYLLABLE CWE +CDE8 ; LV # Lo HANGUL SYLLABLE CWI +CE04 ; LV # Lo HANGUL SYLLABLE CYU +CE20 ; LV # Lo HANGUL SYLLABLE CEU +CE3C ; LV # Lo HANGUL SYLLABLE CYI +CE58 ; LV # Lo HANGUL SYLLABLE CI +CE74 ; LV # Lo HANGUL SYLLABLE KA +CE90 ; LV # Lo HANGUL SYLLABLE KAE +CEAC ; LV # Lo HANGUL SYLLABLE KYA +CEC8 ; LV # Lo HANGUL SYLLABLE KYAE +CEE4 ; LV # Lo HANGUL SYLLABLE KEO +CF00 ; LV # Lo HANGUL SYLLABLE KE +CF1C ; LV # Lo HANGUL SYLLABLE KYEO +CF38 ; LV # Lo HANGUL SYLLABLE KYE +CF54 ; LV # Lo HANGUL SYLLABLE KO +CF70 ; LV # Lo HANGUL SYLLABLE KWA +CF8C ; LV # Lo HANGUL SYLLABLE KWAE +CFA8 ; LV # Lo HANGUL SYLLABLE KOE +CFC4 ; LV # Lo HANGUL SYLLABLE KYO +CFE0 ; LV # Lo HANGUL SYLLABLE KU +CFFC ; LV # Lo HANGUL SYLLABLE KWEO +D018 ; LV # Lo HANGUL SYLLABLE KWE +D034 ; LV # Lo HANGUL SYLLABLE KWI +D050 ; LV # Lo HANGUL SYLLABLE KYU +D06C ; LV # Lo HANGUL SYLLABLE KEU +D088 ; LV # Lo HANGUL SYLLABLE KYI +D0A4 ; LV # Lo HANGUL SYLLABLE KI +D0C0 ; LV # Lo HANGUL SYLLABLE TA +D0DC ; LV # Lo HANGUL SYLLABLE TAE +D0F8 ; LV # Lo HANGUL SYLLABLE TYA +D114 ; LV # Lo HANGUL SYLLABLE TYAE +D130 ; LV # Lo HANGUL SYLLABLE TEO +D14C ; LV # Lo HANGUL SYLLABLE TE +D168 ; LV # Lo HANGUL SYLLABLE TYEO +D184 ; LV # Lo HANGUL SYLLABLE TYE +D1A0 ; LV # Lo HANGUL SYLLABLE TO +D1BC ; LV # Lo HANGUL SYLLABLE TWA +D1D8 ; LV # Lo HANGUL SYLLABLE TWAE +D1F4 ; LV # Lo HANGUL SYLLABLE TOE +D210 ; LV # Lo HANGUL SYLLABLE TYO +D22C ; LV # Lo HANGUL SYLLABLE TU +D248 ; LV # Lo HANGUL SYLLABLE TWEO +D264 ; LV # Lo HANGUL SYLLABLE TWE +D280 ; LV # Lo HANGUL SYLLABLE TWI +D29C ; LV # Lo HANGUL SYLLABLE TYU +D2B8 ; LV # Lo HANGUL SYLLABLE TEU +D2D4 ; LV # Lo HANGUL SYLLABLE TYI +D2F0 ; LV # Lo HANGUL SYLLABLE TI +D30C ; LV # Lo HANGUL SYLLABLE PA +D328 ; LV # Lo HANGUL SYLLABLE PAE +D344 ; LV # Lo HANGUL SYLLABLE PYA +D360 ; LV # Lo HANGUL SYLLABLE PYAE +D37C ; LV # Lo HANGUL SYLLABLE PEO +D398 ; LV # Lo HANGUL SYLLABLE PE +D3B4 ; LV # Lo HANGUL SYLLABLE PYEO +D3D0 ; LV # Lo HANGUL SYLLABLE PYE +D3EC ; LV # Lo HANGUL SYLLABLE PO +D408 ; LV # Lo HANGUL SYLLABLE PWA +D424 ; LV # Lo HANGUL SYLLABLE PWAE +D440 ; LV # Lo HANGUL SYLLABLE POE +D45C ; LV # Lo HANGUL SYLLABLE PYO +D478 ; LV # Lo HANGUL SYLLABLE PU +D494 ; LV # Lo HANGUL SYLLABLE PWEO +D4B0 ; LV # Lo HANGUL SYLLABLE PWE +D4CC ; LV # Lo HANGUL SYLLABLE PWI +D4E8 ; LV # Lo HANGUL SYLLABLE PYU +D504 ; LV # Lo HANGUL SYLLABLE PEU +D520 ; LV # Lo HANGUL SYLLABLE PYI +D53C ; LV # Lo HANGUL SYLLABLE PI +D558 ; LV # Lo HANGUL SYLLABLE HA +D574 ; LV # Lo HANGUL SYLLABLE HAE +D590 ; LV # Lo HANGUL SYLLABLE HYA +D5AC ; LV # Lo HANGUL SYLLABLE HYAE +D5C8 ; LV # Lo HANGUL SYLLABLE HEO +D5E4 ; LV # Lo HANGUL SYLLABLE HE +D600 ; LV # Lo HANGUL SYLLABLE HYEO +D61C ; LV # Lo HANGUL SYLLABLE HYE +D638 ; LV # Lo HANGUL SYLLABLE HO +D654 ; LV # Lo HANGUL SYLLABLE HWA +D670 ; LV # Lo HANGUL SYLLABLE HWAE +D68C ; LV # Lo HANGUL SYLLABLE HOE +D6A8 ; LV # Lo HANGUL SYLLABLE HYO +D6C4 ; LV # Lo HANGUL SYLLABLE HU +D6E0 ; LV # Lo HANGUL SYLLABLE HWEO +D6FC ; LV # Lo HANGUL SYLLABLE HWE +D718 ; LV # Lo HANGUL SYLLABLE HWI +D734 ; LV # Lo HANGUL SYLLABLE HYU +D750 ; LV # Lo HANGUL SYLLABLE HEU +D76C ; LV # Lo HANGUL SYLLABLE HYI +D788 ; LV # Lo HANGUL SYLLABLE HI + +# Total code points: 399 + +# ================================================ + +AC01..AC1B ; LVT # Lo [27] HANGUL SYLLABLE GAG..HANGUL SYLLABLE GAH +AC1D..AC37 ; LVT # Lo [27] HANGUL SYLLABLE GAEG..HANGUL SYLLABLE GAEH +AC39..AC53 ; LVT # Lo [27] HANGUL SYLLABLE GYAG..HANGUL SYLLABLE GYAH +AC55..AC6F ; LVT # Lo [27] HANGUL SYLLABLE GYAEG..HANGUL SYLLABLE GYAEH +AC71..AC8B ; LVT # Lo [27] HANGUL SYLLABLE GEOG..HANGUL SYLLABLE GEOH +AC8D..ACA7 ; LVT # Lo [27] HANGUL SYLLABLE GEG..HANGUL SYLLABLE GEH +ACA9..ACC3 ; LVT # Lo [27] HANGUL SYLLABLE GYEOG..HANGUL SYLLABLE GYEOH +ACC5..ACDF ; LVT # Lo [27] HANGUL SYLLABLE GYEG..HANGUL SYLLABLE GYEH +ACE1..ACFB ; LVT # Lo [27] HANGUL SYLLABLE GOG..HANGUL SYLLABLE GOH +ACFD..AD17 ; LVT # Lo [27] HANGUL SYLLABLE GWAG..HANGUL SYLLABLE GWAH +AD19..AD33 ; LVT # Lo [27] HANGUL SYLLABLE GWAEG..HANGUL SYLLABLE GWAEH +AD35..AD4F ; LVT # Lo [27] HANGUL SYLLABLE GOEG..HANGUL SYLLABLE GOEH +AD51..AD6B ; LVT # Lo [27] HANGUL SYLLABLE GYOG..HANGUL SYLLABLE GYOH +AD6D..AD87 ; LVT # Lo [27] HANGUL SYLLABLE GUG..HANGUL SYLLABLE GUH +AD89..ADA3 ; LVT # Lo [27] HANGUL SYLLABLE GWEOG..HANGUL SYLLABLE GWEOH +ADA5..ADBF ; LVT # Lo [27] HANGUL SYLLABLE GWEG..HANGUL SYLLABLE GWEH +ADC1..ADDB ; LVT # Lo [27] HANGUL SYLLABLE GWIG..HANGUL SYLLABLE GWIH +ADDD..ADF7 ; LVT # Lo [27] HANGUL SYLLABLE GYUG..HANGUL SYLLABLE GYUH +ADF9..AE13 ; LVT # Lo [27] HANGUL SYLLABLE GEUG..HANGUL SYLLABLE GEUH +AE15..AE2F ; LVT # Lo [27] HANGUL SYLLABLE GYIG..HANGUL SYLLABLE GYIH +AE31..AE4B ; LVT # Lo [27] HANGUL SYLLABLE GIG..HANGUL SYLLABLE GIH +AE4D..AE67 ; LVT # Lo [27] HANGUL SYLLABLE GGAG..HANGUL SYLLABLE GGAH +AE69..AE83 ; LVT # Lo [27] HANGUL SYLLABLE GGAEG..HANGUL SYLLABLE GGAEH +AE85..AE9F ; LVT # Lo [27] HANGUL SYLLABLE GGYAG..HANGUL SYLLABLE GGYAH +AEA1..AEBB ; LVT # Lo [27] HANGUL SYLLABLE GGYAEG..HANGUL SYLLABLE GGYAEH +AEBD..AED7 ; LVT # Lo [27] HANGUL SYLLABLE GGEOG..HANGUL SYLLABLE GGEOH +AED9..AEF3 ; LVT # Lo [27] HANGUL SYLLABLE GGEG..HANGUL SYLLABLE GGEH +AEF5..AF0F ; LVT # Lo [27] HANGUL SYLLABLE GGYEOG..HANGUL SYLLABLE GGYEOH +AF11..AF2B ; LVT # Lo [27] HANGUL SYLLABLE GGYEG..HANGUL SYLLABLE GGYEH +AF2D..AF47 ; LVT # Lo [27] HANGUL SYLLABLE GGOG..HANGUL SYLLABLE GGOH +AF49..AF63 ; LVT # Lo [27] HANGUL SYLLABLE GGWAG..HANGUL SYLLABLE GGWAH +AF65..AF7F ; LVT # Lo [27] HANGUL SYLLABLE GGWAEG..HANGUL SYLLABLE GGWAEH +AF81..AF9B ; LVT # Lo [27] HANGUL SYLLABLE GGOEG..HANGUL SYLLABLE GGOEH +AF9D..AFB7 ; LVT # Lo [27] HANGUL SYLLABLE GGYOG..HANGUL SYLLABLE GGYOH +AFB9..AFD3 ; LVT # Lo [27] HANGUL SYLLABLE GGUG..HANGUL SYLLABLE GGUH +AFD5..AFEF ; LVT # Lo [27] HANGUL SYLLABLE GGWEOG..HANGUL SYLLABLE GGWEOH +AFF1..B00B ; LVT # Lo [27] HANGUL SYLLABLE GGWEG..HANGUL SYLLABLE GGWEH +B00D..B027 ; LVT # Lo [27] HANGUL SYLLABLE GGWIG..HANGUL SYLLABLE GGWIH +B029..B043 ; LVT # Lo [27] HANGUL SYLLABLE GGYUG..HANGUL SYLLABLE GGYUH +B045..B05F ; LVT # Lo [27] HANGUL SYLLABLE GGEUG..HANGUL SYLLABLE GGEUH +B061..B07B ; LVT # Lo [27] HANGUL SYLLABLE GGYIG..HANGUL SYLLABLE GGYIH +B07D..B097 ; LVT # Lo [27] HANGUL SYLLABLE GGIG..HANGUL SYLLABLE GGIH +B099..B0B3 ; LVT # Lo [27] HANGUL SYLLABLE NAG..HANGUL SYLLABLE NAH +B0B5..B0CF ; LVT # Lo [27] HANGUL SYLLABLE NAEG..HANGUL SYLLABLE NAEH +B0D1..B0EB ; LVT # Lo [27] HANGUL SYLLABLE NYAG..HANGUL SYLLABLE NYAH +B0ED..B107 ; LVT # Lo [27] HANGUL SYLLABLE NYAEG..HANGUL SYLLABLE NYAEH +B109..B123 ; LVT # Lo [27] HANGUL SYLLABLE NEOG..HANGUL SYLLABLE NEOH +B125..B13F ; LVT # Lo [27] HANGUL SYLLABLE NEG..HANGUL SYLLABLE NEH +B141..B15B ; LVT # Lo [27] HANGUL SYLLABLE NYEOG..HANGUL SYLLABLE NYEOH +B15D..B177 ; LVT # Lo [27] HANGUL SYLLABLE NYEG..HANGUL SYLLABLE NYEH +B179..B193 ; LVT # Lo [27] HANGUL SYLLABLE NOG..HANGUL SYLLABLE NOH +B195..B1AF ; LVT # Lo [27] HANGUL SYLLABLE NWAG..HANGUL SYLLABLE NWAH +B1B1..B1CB ; LVT # Lo [27] HANGUL SYLLABLE NWAEG..HANGUL SYLLABLE NWAEH +B1CD..B1E7 ; LVT # Lo [27] HANGUL SYLLABLE NOEG..HANGUL SYLLABLE NOEH +B1E9..B203 ; LVT # Lo [27] HANGUL SYLLABLE NYOG..HANGUL SYLLABLE NYOH +B205..B21F ; LVT # Lo [27] HANGUL SYLLABLE NUG..HANGUL SYLLABLE NUH +B221..B23B ; LVT # Lo [27] HANGUL SYLLABLE NWEOG..HANGUL SYLLABLE NWEOH +B23D..B257 ; LVT # Lo [27] HANGUL SYLLABLE NWEG..HANGUL SYLLABLE NWEH +B259..B273 ; LVT # Lo [27] HANGUL SYLLABLE NWIG..HANGUL SYLLABLE NWIH +B275..B28F ; LVT # Lo [27] HANGUL SYLLABLE NYUG..HANGUL SYLLABLE NYUH +B291..B2AB ; LVT # Lo [27] HANGUL SYLLABLE NEUG..HANGUL SYLLABLE NEUH +B2AD..B2C7 ; LVT # Lo [27] HANGUL SYLLABLE NYIG..HANGUL SYLLABLE NYIH +B2C9..B2E3 ; LVT # Lo [27] HANGUL SYLLABLE NIG..HANGUL SYLLABLE NIH +B2E5..B2FF ; LVT # Lo [27] HANGUL SYLLABLE DAG..HANGUL SYLLABLE DAH +B301..B31B ; LVT # Lo [27] HANGUL SYLLABLE DAEG..HANGUL SYLLABLE DAEH +B31D..B337 ; LVT # Lo [27] HANGUL SYLLABLE DYAG..HANGUL SYLLABLE DYAH +B339..B353 ; LVT # Lo [27] HANGUL SYLLABLE DYAEG..HANGUL SYLLABLE DYAEH +B355..B36F ; LVT # Lo [27] HANGUL SYLLABLE DEOG..HANGUL SYLLABLE DEOH +B371..B38B ; LVT # Lo [27] HANGUL SYLLABLE DEG..HANGUL SYLLABLE DEH +B38D..B3A7 ; LVT # Lo [27] HANGUL SYLLABLE DYEOG..HANGUL SYLLABLE DYEOH +B3A9..B3C3 ; LVT # Lo [27] HANGUL SYLLABLE DYEG..HANGUL SYLLABLE DYEH +B3C5..B3DF ; LVT # Lo [27] HANGUL SYLLABLE DOG..HANGUL SYLLABLE DOH +B3E1..B3FB ; LVT # Lo [27] HANGUL SYLLABLE DWAG..HANGUL SYLLABLE DWAH +B3FD..B417 ; LVT # Lo [27] HANGUL SYLLABLE DWAEG..HANGUL SYLLABLE DWAEH +B419..B433 ; LVT # Lo [27] HANGUL SYLLABLE DOEG..HANGUL SYLLABLE DOEH +B435..B44F ; LVT # Lo [27] HANGUL SYLLABLE DYOG..HANGUL SYLLABLE DYOH +B451..B46B ; LVT # Lo [27] HANGUL SYLLABLE DUG..HANGUL SYLLABLE DUH +B46D..B487 ; LVT # Lo [27] HANGUL SYLLABLE DWEOG..HANGUL SYLLABLE DWEOH +B489..B4A3 ; LVT # Lo [27] HANGUL SYLLABLE DWEG..HANGUL SYLLABLE DWEH +B4A5..B4BF ; LVT # Lo [27] HANGUL SYLLABLE DWIG..HANGUL SYLLABLE DWIH +B4C1..B4DB ; LVT # Lo [27] HANGUL SYLLABLE DYUG..HANGUL SYLLABLE DYUH +B4DD..B4F7 ; LVT # Lo [27] HANGUL SYLLABLE DEUG..HANGUL SYLLABLE DEUH +B4F9..B513 ; LVT # Lo [27] HANGUL SYLLABLE DYIG..HANGUL SYLLABLE DYIH +B515..B52F ; LVT # Lo [27] HANGUL SYLLABLE DIG..HANGUL SYLLABLE DIH +B531..B54B ; LVT # Lo [27] HANGUL SYLLABLE DDAG..HANGUL SYLLABLE DDAH +B54D..B567 ; LVT # Lo [27] HANGUL SYLLABLE DDAEG..HANGUL SYLLABLE DDAEH +B569..B583 ; LVT # Lo [27] HANGUL SYLLABLE DDYAG..HANGUL SYLLABLE DDYAH +B585..B59F ; LVT # Lo [27] HANGUL SYLLABLE DDYAEG..HANGUL SYLLABLE DDYAEH +B5A1..B5BB ; LVT # Lo [27] HANGUL SYLLABLE DDEOG..HANGUL SYLLABLE DDEOH +B5BD..B5D7 ; LVT # Lo [27] HANGUL SYLLABLE DDEG..HANGUL SYLLABLE DDEH +B5D9..B5F3 ; LVT # Lo [27] HANGUL SYLLABLE DDYEOG..HANGUL SYLLABLE DDYEOH +B5F5..B60F ; LVT # Lo [27] HANGUL SYLLABLE DDYEG..HANGUL SYLLABLE DDYEH +B611..B62B ; LVT # Lo [27] HANGUL SYLLABLE DDOG..HANGUL SYLLABLE DDOH +B62D..B647 ; LVT # Lo [27] HANGUL SYLLABLE DDWAG..HANGUL SYLLABLE DDWAH +B649..B663 ; LVT # Lo [27] HANGUL SYLLABLE DDWAEG..HANGUL SYLLABLE DDWAEH +B665..B67F ; LVT # Lo [27] HANGUL SYLLABLE DDOEG..HANGUL SYLLABLE DDOEH +B681..B69B ; LVT # Lo [27] HANGUL SYLLABLE DDYOG..HANGUL SYLLABLE DDYOH +B69D..B6B7 ; LVT # Lo [27] HANGUL SYLLABLE DDUG..HANGUL SYLLABLE DDUH +B6B9..B6D3 ; LVT # Lo [27] HANGUL SYLLABLE DDWEOG..HANGUL SYLLABLE DDWEOH +B6D5..B6EF ; LVT # Lo [27] HANGUL SYLLABLE DDWEG..HANGUL SYLLABLE DDWEH +B6F1..B70B ; LVT # Lo [27] HANGUL SYLLABLE DDWIG..HANGUL SYLLABLE DDWIH +B70D..B727 ; LVT # Lo [27] HANGUL SYLLABLE DDYUG..HANGUL SYLLABLE DDYUH +B729..B743 ; LVT # Lo [27] HANGUL SYLLABLE DDEUG..HANGUL SYLLABLE DDEUH +B745..B75F ; LVT # Lo [27] HANGUL SYLLABLE DDYIG..HANGUL SYLLABLE DDYIH +B761..B77B ; LVT # Lo [27] HANGUL SYLLABLE DDIG..HANGUL SYLLABLE DDIH +B77D..B797 ; LVT # Lo [27] HANGUL SYLLABLE RAG..HANGUL SYLLABLE RAH +B799..B7B3 ; LVT # Lo [27] HANGUL SYLLABLE RAEG..HANGUL SYLLABLE RAEH +B7B5..B7CF ; LVT # Lo [27] HANGUL SYLLABLE RYAG..HANGUL SYLLABLE RYAH +B7D1..B7EB ; LVT # Lo [27] HANGUL SYLLABLE RYAEG..HANGUL SYLLABLE RYAEH +B7ED..B807 ; LVT # Lo [27] HANGUL SYLLABLE REOG..HANGUL SYLLABLE REOH +B809..B823 ; LVT # Lo [27] HANGUL SYLLABLE REG..HANGUL SYLLABLE REH +B825..B83F ; LVT # Lo [27] HANGUL SYLLABLE RYEOG..HANGUL SYLLABLE RYEOH +B841..B85B ; LVT # Lo [27] HANGUL SYLLABLE RYEG..HANGUL SYLLABLE RYEH +B85D..B877 ; LVT # Lo [27] HANGUL SYLLABLE ROG..HANGUL SYLLABLE ROH +B879..B893 ; LVT # Lo [27] HANGUL SYLLABLE RWAG..HANGUL SYLLABLE RWAH +B895..B8AF ; LVT # Lo [27] HANGUL SYLLABLE RWAEG..HANGUL SYLLABLE RWAEH +B8B1..B8CB ; LVT # Lo [27] HANGUL SYLLABLE ROEG..HANGUL SYLLABLE ROEH +B8CD..B8E7 ; LVT # Lo [27] HANGUL SYLLABLE RYOG..HANGUL SYLLABLE RYOH +B8E9..B903 ; LVT # Lo [27] HANGUL SYLLABLE RUG..HANGUL SYLLABLE RUH +B905..B91F ; LVT # Lo [27] HANGUL SYLLABLE RWEOG..HANGUL SYLLABLE RWEOH +B921..B93B ; LVT # Lo [27] HANGUL SYLLABLE RWEG..HANGUL SYLLABLE RWEH +B93D..B957 ; LVT # Lo [27] HANGUL SYLLABLE RWIG..HANGUL SYLLABLE RWIH +B959..B973 ; LVT # Lo [27] HANGUL SYLLABLE RYUG..HANGUL SYLLABLE RYUH +B975..B98F ; LVT # Lo [27] HANGUL SYLLABLE REUG..HANGUL SYLLABLE REUH +B991..B9AB ; LVT # Lo [27] HANGUL SYLLABLE RYIG..HANGUL SYLLABLE RYIH +B9AD..B9C7 ; LVT # Lo [27] HANGUL SYLLABLE RIG..HANGUL SYLLABLE RIH +B9C9..B9E3 ; LVT # Lo [27] HANGUL SYLLABLE MAG..HANGUL SYLLABLE MAH +B9E5..B9FF ; LVT # Lo [27] HANGUL SYLLABLE MAEG..HANGUL SYLLABLE MAEH +BA01..BA1B ; LVT # Lo [27] HANGUL SYLLABLE MYAG..HANGUL SYLLABLE MYAH +BA1D..BA37 ; LVT # Lo [27] HANGUL SYLLABLE MYAEG..HANGUL SYLLABLE MYAEH +BA39..BA53 ; LVT # Lo [27] HANGUL SYLLABLE MEOG..HANGUL SYLLABLE MEOH +BA55..BA6F ; LVT # Lo [27] HANGUL SYLLABLE MEG..HANGUL SYLLABLE MEH +BA71..BA8B ; LVT # Lo [27] HANGUL SYLLABLE MYEOG..HANGUL SYLLABLE MYEOH +BA8D..BAA7 ; LVT # Lo [27] HANGUL SYLLABLE MYEG..HANGUL SYLLABLE MYEH +BAA9..BAC3 ; LVT # Lo [27] HANGUL SYLLABLE MOG..HANGUL SYLLABLE MOH +BAC5..BADF ; LVT # Lo [27] HANGUL SYLLABLE MWAG..HANGUL SYLLABLE MWAH +BAE1..BAFB ; LVT # Lo [27] HANGUL SYLLABLE MWAEG..HANGUL SYLLABLE MWAEH +BAFD..BB17 ; LVT # Lo [27] HANGUL SYLLABLE MOEG..HANGUL SYLLABLE MOEH +BB19..BB33 ; LVT # Lo [27] HANGUL SYLLABLE MYOG..HANGUL SYLLABLE MYOH +BB35..BB4F ; LVT # Lo [27] HANGUL SYLLABLE MUG..HANGUL SYLLABLE MUH +BB51..BB6B ; LVT # Lo [27] HANGUL SYLLABLE MWEOG..HANGUL SYLLABLE MWEOH +BB6D..BB87 ; LVT # Lo [27] HANGUL SYLLABLE MWEG..HANGUL SYLLABLE MWEH +BB89..BBA3 ; LVT # Lo [27] HANGUL SYLLABLE MWIG..HANGUL SYLLABLE MWIH +BBA5..BBBF ; LVT # Lo [27] HANGUL SYLLABLE MYUG..HANGUL SYLLABLE MYUH +BBC1..BBDB ; LVT # Lo [27] HANGUL SYLLABLE MEUG..HANGUL SYLLABLE MEUH +BBDD..BBF7 ; LVT # Lo [27] HANGUL SYLLABLE MYIG..HANGUL SYLLABLE MYIH +BBF9..BC13 ; LVT # Lo [27] HANGUL SYLLABLE MIG..HANGUL SYLLABLE MIH +BC15..BC2F ; LVT # Lo [27] HANGUL SYLLABLE BAG..HANGUL SYLLABLE BAH +BC31..BC4B ; LVT # Lo [27] HANGUL SYLLABLE BAEG..HANGUL SYLLABLE BAEH +BC4D..BC67 ; LVT # Lo [27] HANGUL SYLLABLE BYAG..HANGUL SYLLABLE BYAH +BC69..BC83 ; LVT # Lo [27] HANGUL SYLLABLE BYAEG..HANGUL SYLLABLE BYAEH +BC85..BC9F ; LVT # Lo [27] HANGUL SYLLABLE BEOG..HANGUL SYLLABLE BEOH +BCA1..BCBB ; LVT # Lo [27] HANGUL SYLLABLE BEG..HANGUL SYLLABLE BEH +BCBD..BCD7 ; LVT # Lo [27] HANGUL SYLLABLE BYEOG..HANGUL SYLLABLE BYEOH +BCD9..BCF3 ; LVT # Lo [27] HANGUL SYLLABLE BYEG..HANGUL SYLLABLE BYEH +BCF5..BD0F ; LVT # Lo [27] HANGUL SYLLABLE BOG..HANGUL SYLLABLE BOH +BD11..BD2B ; LVT # Lo [27] HANGUL SYLLABLE BWAG..HANGUL SYLLABLE BWAH +BD2D..BD47 ; LVT # Lo [27] HANGUL SYLLABLE BWAEG..HANGUL SYLLABLE BWAEH +BD49..BD63 ; LVT # Lo [27] HANGUL SYLLABLE BOEG..HANGUL SYLLABLE BOEH +BD65..BD7F ; LVT # Lo [27] HANGUL SYLLABLE BYOG..HANGUL SYLLABLE BYOH +BD81..BD9B ; LVT # Lo [27] HANGUL SYLLABLE BUG..HANGUL SYLLABLE BUH +BD9D..BDB7 ; LVT # Lo [27] HANGUL SYLLABLE BWEOG..HANGUL SYLLABLE BWEOH +BDB9..BDD3 ; LVT # Lo [27] HANGUL SYLLABLE BWEG..HANGUL SYLLABLE BWEH +BDD5..BDEF ; LVT # Lo [27] HANGUL SYLLABLE BWIG..HANGUL SYLLABLE BWIH +BDF1..BE0B ; LVT # Lo [27] HANGUL SYLLABLE BYUG..HANGUL SYLLABLE BYUH +BE0D..BE27 ; LVT # Lo [27] HANGUL SYLLABLE BEUG..HANGUL SYLLABLE BEUH +BE29..BE43 ; LVT # Lo [27] HANGUL SYLLABLE BYIG..HANGUL SYLLABLE BYIH +BE45..BE5F ; LVT # Lo [27] HANGUL SYLLABLE BIG..HANGUL SYLLABLE BIH +BE61..BE7B ; LVT # Lo [27] HANGUL SYLLABLE BBAG..HANGUL SYLLABLE BBAH +BE7D..BE97 ; LVT # Lo [27] HANGUL SYLLABLE BBAEG..HANGUL SYLLABLE BBAEH +BE99..BEB3 ; LVT # Lo [27] HANGUL SYLLABLE BBYAG..HANGUL SYLLABLE BBYAH +BEB5..BECF ; LVT # Lo [27] HANGUL SYLLABLE BBYAEG..HANGUL SYLLABLE BBYAEH +BED1..BEEB ; LVT # Lo [27] HANGUL SYLLABLE BBEOG..HANGUL SYLLABLE BBEOH +BEED..BF07 ; LVT # Lo [27] HANGUL SYLLABLE BBEG..HANGUL SYLLABLE BBEH +BF09..BF23 ; LVT # Lo [27] HANGUL SYLLABLE BBYEOG..HANGUL SYLLABLE BBYEOH +BF25..BF3F ; LVT # Lo [27] HANGUL SYLLABLE BBYEG..HANGUL SYLLABLE BBYEH +BF41..BF5B ; LVT # Lo [27] HANGUL SYLLABLE BBOG..HANGUL SYLLABLE BBOH +BF5D..BF77 ; LVT # Lo [27] HANGUL SYLLABLE BBWAG..HANGUL SYLLABLE BBWAH +BF79..BF93 ; LVT # Lo [27] HANGUL SYLLABLE BBWAEG..HANGUL SYLLABLE BBWAEH +BF95..BFAF ; LVT # Lo [27] HANGUL SYLLABLE BBOEG..HANGUL SYLLABLE BBOEH +BFB1..BFCB ; LVT # Lo [27] HANGUL SYLLABLE BBYOG..HANGUL SYLLABLE BBYOH +BFCD..BFE7 ; LVT # Lo [27] HANGUL SYLLABLE BBUG..HANGUL SYLLABLE BBUH +BFE9..C003 ; LVT # Lo [27] HANGUL SYLLABLE BBWEOG..HANGUL SYLLABLE BBWEOH +C005..C01F ; LVT # Lo [27] HANGUL SYLLABLE BBWEG..HANGUL SYLLABLE BBWEH +C021..C03B ; LVT # Lo [27] HANGUL SYLLABLE BBWIG..HANGUL SYLLABLE BBWIH +C03D..C057 ; LVT # Lo [27] HANGUL SYLLABLE BBYUG..HANGUL SYLLABLE BBYUH +C059..C073 ; LVT # Lo [27] HANGUL SYLLABLE BBEUG..HANGUL SYLLABLE BBEUH +C075..C08F ; LVT # Lo [27] HANGUL SYLLABLE BBYIG..HANGUL SYLLABLE BBYIH +C091..C0AB ; LVT # Lo [27] HANGUL SYLLABLE BBIG..HANGUL SYLLABLE BBIH +C0AD..C0C7 ; LVT # Lo [27] HANGUL SYLLABLE SAG..HANGUL SYLLABLE SAH +C0C9..C0E3 ; LVT # Lo [27] HANGUL SYLLABLE SAEG..HANGUL SYLLABLE SAEH +C0E5..C0FF ; LVT # Lo [27] HANGUL SYLLABLE SYAG..HANGUL SYLLABLE SYAH +C101..C11B ; LVT # Lo [27] HANGUL SYLLABLE SYAEG..HANGUL SYLLABLE SYAEH +C11D..C137 ; LVT # Lo [27] HANGUL SYLLABLE SEOG..HANGUL SYLLABLE SEOH +C139..C153 ; LVT # Lo [27] HANGUL SYLLABLE SEG..HANGUL SYLLABLE SEH +C155..C16F ; LVT # Lo [27] HANGUL SYLLABLE SYEOG..HANGUL SYLLABLE SYEOH +C171..C18B ; LVT # Lo [27] HANGUL SYLLABLE SYEG..HANGUL SYLLABLE SYEH +C18D..C1A7 ; LVT # Lo [27] HANGUL SYLLABLE SOG..HANGUL SYLLABLE SOH +C1A9..C1C3 ; LVT # Lo [27] HANGUL SYLLABLE SWAG..HANGUL SYLLABLE SWAH +C1C5..C1DF ; LVT # Lo [27] HANGUL SYLLABLE SWAEG..HANGUL SYLLABLE SWAEH +C1E1..C1FB ; LVT # Lo [27] HANGUL SYLLABLE SOEG..HANGUL SYLLABLE SOEH +C1FD..C217 ; LVT # Lo [27] HANGUL SYLLABLE SYOG..HANGUL SYLLABLE SYOH +C219..C233 ; LVT # Lo [27] HANGUL SYLLABLE SUG..HANGUL SYLLABLE SUH +C235..C24F ; LVT # Lo [27] HANGUL SYLLABLE SWEOG..HANGUL SYLLABLE SWEOH +C251..C26B ; LVT # Lo [27] HANGUL SYLLABLE SWEG..HANGUL SYLLABLE SWEH +C26D..C287 ; LVT # Lo [27] HANGUL SYLLABLE SWIG..HANGUL SYLLABLE SWIH +C289..C2A3 ; LVT # Lo [27] HANGUL SYLLABLE SYUG..HANGUL SYLLABLE SYUH +C2A5..C2BF ; LVT # Lo [27] HANGUL SYLLABLE SEUG..HANGUL SYLLABLE SEUH +C2C1..C2DB ; LVT # Lo [27] HANGUL SYLLABLE SYIG..HANGUL SYLLABLE SYIH +C2DD..C2F7 ; LVT # Lo [27] HANGUL SYLLABLE SIG..HANGUL SYLLABLE SIH +C2F9..C313 ; LVT # Lo [27] HANGUL SYLLABLE SSAG..HANGUL SYLLABLE SSAH +C315..C32F ; LVT # Lo [27] HANGUL SYLLABLE SSAEG..HANGUL SYLLABLE SSAEH +C331..C34B ; LVT # Lo [27] HANGUL SYLLABLE SSYAG..HANGUL SYLLABLE SSYAH +C34D..C367 ; LVT # Lo [27] HANGUL SYLLABLE SSYAEG..HANGUL SYLLABLE SSYAEH +C369..C383 ; LVT # Lo [27] HANGUL SYLLABLE SSEOG..HANGUL SYLLABLE SSEOH +C385..C39F ; LVT # Lo [27] HANGUL SYLLABLE SSEG..HANGUL SYLLABLE SSEH +C3A1..C3BB ; LVT # Lo [27] HANGUL SYLLABLE SSYEOG..HANGUL SYLLABLE SSYEOH +C3BD..C3D7 ; LVT # Lo [27] HANGUL SYLLABLE SSYEG..HANGUL SYLLABLE SSYEH +C3D9..C3F3 ; LVT # Lo [27] HANGUL SYLLABLE SSOG..HANGUL SYLLABLE SSOH +C3F5..C40F ; LVT # Lo [27] HANGUL SYLLABLE SSWAG..HANGUL SYLLABLE SSWAH +C411..C42B ; LVT # Lo [27] HANGUL SYLLABLE SSWAEG..HANGUL SYLLABLE SSWAEH +C42D..C447 ; LVT # Lo [27] HANGUL SYLLABLE SSOEG..HANGUL SYLLABLE SSOEH +C449..C463 ; LVT # Lo [27] HANGUL SYLLABLE SSYOG..HANGUL SYLLABLE SSYOH +C465..C47F ; LVT # Lo [27] HANGUL SYLLABLE SSUG..HANGUL SYLLABLE SSUH +C481..C49B ; LVT # Lo [27] HANGUL SYLLABLE SSWEOG..HANGUL SYLLABLE SSWEOH +C49D..C4B7 ; LVT # Lo [27] HANGUL SYLLABLE SSWEG..HANGUL SYLLABLE SSWEH +C4B9..C4D3 ; LVT # Lo [27] HANGUL SYLLABLE SSWIG..HANGUL SYLLABLE SSWIH +C4D5..C4EF ; LVT # Lo [27] HANGUL SYLLABLE SSYUG..HANGUL SYLLABLE SSYUH +C4F1..C50B ; LVT # Lo [27] HANGUL SYLLABLE SSEUG..HANGUL SYLLABLE SSEUH +C50D..C527 ; LVT # Lo [27] HANGUL SYLLABLE SSYIG..HANGUL SYLLABLE SSYIH +C529..C543 ; LVT # Lo [27] HANGUL SYLLABLE SSIG..HANGUL SYLLABLE SSIH +C545..C55F ; LVT # Lo [27] HANGUL SYLLABLE AG..HANGUL SYLLABLE AH +C561..C57B ; LVT # Lo [27] HANGUL SYLLABLE AEG..HANGUL SYLLABLE AEH +C57D..C597 ; LVT # Lo [27] HANGUL SYLLABLE YAG..HANGUL SYLLABLE YAH +C599..C5B3 ; LVT # Lo [27] HANGUL SYLLABLE YAEG..HANGUL SYLLABLE YAEH +C5B5..C5CF ; LVT # Lo [27] HANGUL SYLLABLE EOG..HANGUL SYLLABLE EOH +C5D1..C5EB ; LVT # Lo [27] HANGUL SYLLABLE EG..HANGUL SYLLABLE EH +C5ED..C607 ; LVT # Lo [27] HANGUL SYLLABLE YEOG..HANGUL SYLLABLE YEOH +C609..C623 ; LVT # Lo [27] HANGUL SYLLABLE YEG..HANGUL SYLLABLE YEH +C625..C63F ; LVT # Lo [27] HANGUL SYLLABLE OG..HANGUL SYLLABLE OH +C641..C65B ; LVT # Lo [27] HANGUL SYLLABLE WAG..HANGUL SYLLABLE WAH +C65D..C677 ; LVT # Lo [27] HANGUL SYLLABLE WAEG..HANGUL SYLLABLE WAEH +C679..C693 ; LVT # Lo [27] HANGUL SYLLABLE OEG..HANGUL SYLLABLE OEH +C695..C6AF ; LVT # Lo [27] HANGUL SYLLABLE YOG..HANGUL SYLLABLE YOH +C6B1..C6CB ; LVT # Lo [27] HANGUL SYLLABLE UG..HANGUL SYLLABLE UH +C6CD..C6E7 ; LVT # Lo [27] HANGUL SYLLABLE WEOG..HANGUL SYLLABLE WEOH +C6E9..C703 ; LVT # Lo [27] HANGUL SYLLABLE WEG..HANGUL SYLLABLE WEH +C705..C71F ; LVT # Lo [27] HANGUL SYLLABLE WIG..HANGUL SYLLABLE WIH +C721..C73B ; LVT # Lo [27] HANGUL SYLLABLE YUG..HANGUL SYLLABLE YUH +C73D..C757 ; LVT # Lo [27] HANGUL SYLLABLE EUG..HANGUL SYLLABLE EUH +C759..C773 ; LVT # Lo [27] HANGUL SYLLABLE YIG..HANGUL SYLLABLE YIH +C775..C78F ; LVT # Lo [27] HANGUL SYLLABLE IG..HANGUL SYLLABLE IH +C791..C7AB ; LVT # Lo [27] HANGUL SYLLABLE JAG..HANGUL SYLLABLE JAH +C7AD..C7C7 ; LVT # Lo [27] HANGUL SYLLABLE JAEG..HANGUL SYLLABLE JAEH +C7C9..C7E3 ; LVT # Lo [27] HANGUL SYLLABLE JYAG..HANGUL SYLLABLE JYAH +C7E5..C7FF ; LVT # Lo [27] HANGUL SYLLABLE JYAEG..HANGUL SYLLABLE JYAEH +C801..C81B ; LVT # Lo [27] HANGUL SYLLABLE JEOG..HANGUL SYLLABLE JEOH +C81D..C837 ; LVT # Lo [27] HANGUL SYLLABLE JEG..HANGUL SYLLABLE JEH +C839..C853 ; LVT # Lo [27] HANGUL SYLLABLE JYEOG..HANGUL SYLLABLE JYEOH +C855..C86F ; LVT # Lo [27] HANGUL SYLLABLE JYEG..HANGUL SYLLABLE JYEH +C871..C88B ; LVT # Lo [27] HANGUL SYLLABLE JOG..HANGUL SYLLABLE JOH +C88D..C8A7 ; LVT # Lo [27] HANGUL SYLLABLE JWAG..HANGUL SYLLABLE JWAH +C8A9..C8C3 ; LVT # Lo [27] HANGUL SYLLABLE JWAEG..HANGUL SYLLABLE JWAEH +C8C5..C8DF ; LVT # Lo [27] HANGUL SYLLABLE JOEG..HANGUL SYLLABLE JOEH +C8E1..C8FB ; LVT # Lo [27] HANGUL SYLLABLE JYOG..HANGUL SYLLABLE JYOH +C8FD..C917 ; LVT # Lo [27] HANGUL SYLLABLE JUG..HANGUL SYLLABLE JUH +C919..C933 ; LVT # Lo [27] HANGUL SYLLABLE JWEOG..HANGUL SYLLABLE JWEOH +C935..C94F ; LVT # Lo [27] HANGUL SYLLABLE JWEG..HANGUL SYLLABLE JWEH +C951..C96B ; LVT # Lo [27] HANGUL SYLLABLE JWIG..HANGUL SYLLABLE JWIH +C96D..C987 ; LVT # Lo [27] HANGUL SYLLABLE JYUG..HANGUL SYLLABLE JYUH +C989..C9A3 ; LVT # Lo [27] HANGUL SYLLABLE JEUG..HANGUL SYLLABLE JEUH +C9A5..C9BF ; LVT # Lo [27] HANGUL SYLLABLE JYIG..HANGUL SYLLABLE JYIH +C9C1..C9DB ; LVT # Lo [27] HANGUL SYLLABLE JIG..HANGUL SYLLABLE JIH +C9DD..C9F7 ; LVT # Lo [27] HANGUL SYLLABLE JJAG..HANGUL SYLLABLE JJAH +C9F9..CA13 ; LVT # Lo [27] HANGUL SYLLABLE JJAEG..HANGUL SYLLABLE JJAEH +CA15..CA2F ; LVT # Lo [27] HANGUL SYLLABLE JJYAG..HANGUL SYLLABLE JJYAH +CA31..CA4B ; LVT # Lo [27] HANGUL SYLLABLE JJYAEG..HANGUL SYLLABLE JJYAEH +CA4D..CA67 ; LVT # Lo [27] HANGUL SYLLABLE JJEOG..HANGUL SYLLABLE JJEOH +CA69..CA83 ; LVT # Lo [27] HANGUL SYLLABLE JJEG..HANGUL SYLLABLE JJEH +CA85..CA9F ; LVT # Lo [27] HANGUL SYLLABLE JJYEOG..HANGUL SYLLABLE JJYEOH +CAA1..CABB ; LVT # Lo [27] HANGUL SYLLABLE JJYEG..HANGUL SYLLABLE JJYEH +CABD..CAD7 ; LVT # Lo [27] HANGUL SYLLABLE JJOG..HANGUL SYLLABLE JJOH +CAD9..CAF3 ; LVT # Lo [27] HANGUL SYLLABLE JJWAG..HANGUL SYLLABLE JJWAH +CAF5..CB0F ; LVT # Lo [27] HANGUL SYLLABLE JJWAEG..HANGUL SYLLABLE JJWAEH +CB11..CB2B ; LVT # Lo [27] HANGUL SYLLABLE JJOEG..HANGUL SYLLABLE JJOEH +CB2D..CB47 ; LVT # Lo [27] HANGUL SYLLABLE JJYOG..HANGUL SYLLABLE JJYOH +CB49..CB63 ; LVT # Lo [27] HANGUL SYLLABLE JJUG..HANGUL SYLLABLE JJUH +CB65..CB7F ; LVT # Lo [27] HANGUL SYLLABLE JJWEOG..HANGUL SYLLABLE JJWEOH +CB81..CB9B ; LVT # Lo [27] HANGUL SYLLABLE JJWEG..HANGUL SYLLABLE JJWEH +CB9D..CBB7 ; LVT # Lo [27] HANGUL SYLLABLE JJWIG..HANGUL SYLLABLE JJWIH +CBB9..CBD3 ; LVT # Lo [27] HANGUL SYLLABLE JJYUG..HANGUL SYLLABLE JJYUH +CBD5..CBEF ; LVT # Lo [27] HANGUL SYLLABLE JJEUG..HANGUL SYLLABLE JJEUH +CBF1..CC0B ; LVT # Lo [27] HANGUL SYLLABLE JJYIG..HANGUL SYLLABLE JJYIH +CC0D..CC27 ; LVT # Lo [27] HANGUL SYLLABLE JJIG..HANGUL SYLLABLE JJIH +CC29..CC43 ; LVT # Lo [27] HANGUL SYLLABLE CAG..HANGUL SYLLABLE CAH +CC45..CC5F ; LVT # Lo [27] HANGUL SYLLABLE CAEG..HANGUL SYLLABLE CAEH +CC61..CC7B ; LVT # Lo [27] HANGUL SYLLABLE CYAG..HANGUL SYLLABLE CYAH +CC7D..CC97 ; LVT # Lo [27] HANGUL SYLLABLE CYAEG..HANGUL SYLLABLE CYAEH +CC99..CCB3 ; LVT # Lo [27] HANGUL SYLLABLE CEOG..HANGUL SYLLABLE CEOH +CCB5..CCCF ; LVT # Lo [27] HANGUL SYLLABLE CEG..HANGUL SYLLABLE CEH +CCD1..CCEB ; LVT # Lo [27] HANGUL SYLLABLE CYEOG..HANGUL SYLLABLE CYEOH +CCED..CD07 ; LVT # Lo [27] HANGUL SYLLABLE CYEG..HANGUL SYLLABLE CYEH +CD09..CD23 ; LVT # Lo [27] HANGUL SYLLABLE COG..HANGUL SYLLABLE COH +CD25..CD3F ; LVT # Lo [27] HANGUL SYLLABLE CWAG..HANGUL SYLLABLE CWAH +CD41..CD5B ; LVT # Lo [27] HANGUL SYLLABLE CWAEG..HANGUL SYLLABLE CWAEH +CD5D..CD77 ; LVT # Lo [27] HANGUL SYLLABLE COEG..HANGUL SYLLABLE COEH +CD79..CD93 ; LVT # Lo [27] HANGUL SYLLABLE CYOG..HANGUL SYLLABLE CYOH +CD95..CDAF ; LVT # Lo [27] HANGUL SYLLABLE CUG..HANGUL SYLLABLE CUH +CDB1..CDCB ; LVT # Lo [27] HANGUL SYLLABLE CWEOG..HANGUL SYLLABLE CWEOH +CDCD..CDE7 ; LVT # Lo [27] HANGUL SYLLABLE CWEG..HANGUL SYLLABLE CWEH +CDE9..CE03 ; LVT # Lo [27] HANGUL SYLLABLE CWIG..HANGUL SYLLABLE CWIH +CE05..CE1F ; LVT # Lo [27] HANGUL SYLLABLE CYUG..HANGUL SYLLABLE CYUH +CE21..CE3B ; LVT # Lo [27] HANGUL SYLLABLE CEUG..HANGUL SYLLABLE CEUH +CE3D..CE57 ; LVT # Lo [27] HANGUL SYLLABLE CYIG..HANGUL SYLLABLE CYIH +CE59..CE73 ; LVT # Lo [27] HANGUL SYLLABLE CIG..HANGUL SYLLABLE CIH +CE75..CE8F ; LVT # Lo [27] HANGUL SYLLABLE KAG..HANGUL SYLLABLE KAH +CE91..CEAB ; LVT # Lo [27] HANGUL SYLLABLE KAEG..HANGUL SYLLABLE KAEH +CEAD..CEC7 ; LVT # Lo [27] HANGUL SYLLABLE KYAG..HANGUL SYLLABLE KYAH +CEC9..CEE3 ; LVT # Lo [27] HANGUL SYLLABLE KYAEG..HANGUL SYLLABLE KYAEH +CEE5..CEFF ; LVT # Lo [27] HANGUL SYLLABLE KEOG..HANGUL SYLLABLE KEOH +CF01..CF1B ; LVT # Lo [27] HANGUL SYLLABLE KEG..HANGUL SYLLABLE KEH +CF1D..CF37 ; LVT # Lo [27] HANGUL SYLLABLE KYEOG..HANGUL SYLLABLE KYEOH +CF39..CF53 ; LVT # Lo [27] HANGUL SYLLABLE KYEG..HANGUL SYLLABLE KYEH +CF55..CF6F ; LVT # Lo [27] HANGUL SYLLABLE KOG..HANGUL SYLLABLE KOH +CF71..CF8B ; LVT # Lo [27] HANGUL SYLLABLE KWAG..HANGUL SYLLABLE KWAH +CF8D..CFA7 ; LVT # Lo [27] HANGUL SYLLABLE KWAEG..HANGUL SYLLABLE KWAEH +CFA9..CFC3 ; LVT # Lo [27] HANGUL SYLLABLE KOEG..HANGUL SYLLABLE KOEH +CFC5..CFDF ; LVT # Lo [27] HANGUL SYLLABLE KYOG..HANGUL SYLLABLE KYOH +CFE1..CFFB ; LVT # Lo [27] HANGUL SYLLABLE KUG..HANGUL SYLLABLE KUH +CFFD..D017 ; LVT # Lo [27] HANGUL SYLLABLE KWEOG..HANGUL SYLLABLE KWEOH +D019..D033 ; LVT # Lo [27] HANGUL SYLLABLE KWEG..HANGUL SYLLABLE KWEH +D035..D04F ; LVT # Lo [27] HANGUL SYLLABLE KWIG..HANGUL SYLLABLE KWIH +D051..D06B ; LVT # Lo [27] HANGUL SYLLABLE KYUG..HANGUL SYLLABLE KYUH +D06D..D087 ; LVT # Lo [27] HANGUL SYLLABLE KEUG..HANGUL SYLLABLE KEUH +D089..D0A3 ; LVT # Lo [27] HANGUL SYLLABLE KYIG..HANGUL SYLLABLE KYIH +D0A5..D0BF ; LVT # Lo [27] HANGUL SYLLABLE KIG..HANGUL SYLLABLE KIH +D0C1..D0DB ; LVT # Lo [27] HANGUL SYLLABLE TAG..HANGUL SYLLABLE TAH +D0DD..D0F7 ; LVT # Lo [27] HANGUL SYLLABLE TAEG..HANGUL SYLLABLE TAEH +D0F9..D113 ; LVT # Lo [27] HANGUL SYLLABLE TYAG..HANGUL SYLLABLE TYAH +D115..D12F ; LVT # Lo [27] HANGUL SYLLABLE TYAEG..HANGUL SYLLABLE TYAEH +D131..D14B ; LVT # Lo [27] HANGUL SYLLABLE TEOG..HANGUL SYLLABLE TEOH +D14D..D167 ; LVT # Lo [27] HANGUL SYLLABLE TEG..HANGUL SYLLABLE TEH +D169..D183 ; LVT # Lo [27] HANGUL SYLLABLE TYEOG..HANGUL SYLLABLE TYEOH +D185..D19F ; LVT # Lo [27] HANGUL SYLLABLE TYEG..HANGUL SYLLABLE TYEH +D1A1..D1BB ; LVT # Lo [27] HANGUL SYLLABLE TOG..HANGUL SYLLABLE TOH +D1BD..D1D7 ; LVT # Lo [27] HANGUL SYLLABLE TWAG..HANGUL SYLLABLE TWAH +D1D9..D1F3 ; LVT # Lo [27] HANGUL SYLLABLE TWAEG..HANGUL SYLLABLE TWAEH +D1F5..D20F ; LVT # Lo [27] HANGUL SYLLABLE TOEG..HANGUL SYLLABLE TOEH +D211..D22B ; LVT # Lo [27] HANGUL SYLLABLE TYOG..HANGUL SYLLABLE TYOH +D22D..D247 ; LVT # Lo [27] HANGUL SYLLABLE TUG..HANGUL SYLLABLE TUH +D249..D263 ; LVT # Lo [27] HANGUL SYLLABLE TWEOG..HANGUL SYLLABLE TWEOH +D265..D27F ; LVT # Lo [27] HANGUL SYLLABLE TWEG..HANGUL SYLLABLE TWEH +D281..D29B ; LVT # Lo [27] HANGUL SYLLABLE TWIG..HANGUL SYLLABLE TWIH +D29D..D2B7 ; LVT # Lo [27] HANGUL SYLLABLE TYUG..HANGUL SYLLABLE TYUH +D2B9..D2D3 ; LVT # Lo [27] HANGUL SYLLABLE TEUG..HANGUL SYLLABLE TEUH +D2D5..D2EF ; LVT # Lo [27] HANGUL SYLLABLE TYIG..HANGUL SYLLABLE TYIH +D2F1..D30B ; LVT # Lo [27] HANGUL SYLLABLE TIG..HANGUL SYLLABLE TIH +D30D..D327 ; LVT # Lo [27] HANGUL SYLLABLE PAG..HANGUL SYLLABLE PAH +D329..D343 ; LVT # Lo [27] HANGUL SYLLABLE PAEG..HANGUL SYLLABLE PAEH +D345..D35F ; LVT # Lo [27] HANGUL SYLLABLE PYAG..HANGUL SYLLABLE PYAH +D361..D37B ; LVT # Lo [27] HANGUL SYLLABLE PYAEG..HANGUL SYLLABLE PYAEH +D37D..D397 ; LVT # Lo [27] HANGUL SYLLABLE PEOG..HANGUL SYLLABLE PEOH +D399..D3B3 ; LVT # Lo [27] HANGUL SYLLABLE PEG..HANGUL SYLLABLE PEH +D3B5..D3CF ; LVT # Lo [27] HANGUL SYLLABLE PYEOG..HANGUL SYLLABLE PYEOH +D3D1..D3EB ; LVT # Lo [27] HANGUL SYLLABLE PYEG..HANGUL SYLLABLE PYEH +D3ED..D407 ; LVT # Lo [27] HANGUL SYLLABLE POG..HANGUL SYLLABLE POH +D409..D423 ; LVT # Lo [27] HANGUL SYLLABLE PWAG..HANGUL SYLLABLE PWAH +D425..D43F ; LVT # Lo [27] HANGUL SYLLABLE PWAEG..HANGUL SYLLABLE PWAEH +D441..D45B ; LVT # Lo [27] HANGUL SYLLABLE POEG..HANGUL SYLLABLE POEH +D45D..D477 ; LVT # Lo [27] HANGUL SYLLABLE PYOG..HANGUL SYLLABLE PYOH +D479..D493 ; LVT # Lo [27] HANGUL SYLLABLE PUG..HANGUL SYLLABLE PUH +D495..D4AF ; LVT # Lo [27] HANGUL SYLLABLE PWEOG..HANGUL SYLLABLE PWEOH +D4B1..D4CB ; LVT # Lo [27] HANGUL SYLLABLE PWEG..HANGUL SYLLABLE PWEH +D4CD..D4E7 ; LVT # Lo [27] HANGUL SYLLABLE PWIG..HANGUL SYLLABLE PWIH +D4E9..D503 ; LVT # Lo [27] HANGUL SYLLABLE PYUG..HANGUL SYLLABLE PYUH +D505..D51F ; LVT # Lo [27] HANGUL SYLLABLE PEUG..HANGUL SYLLABLE PEUH +D521..D53B ; LVT # Lo [27] HANGUL SYLLABLE PYIG..HANGUL SYLLABLE PYIH +D53D..D557 ; LVT # Lo [27] HANGUL SYLLABLE PIG..HANGUL SYLLABLE PIH +D559..D573 ; LVT # Lo [27] HANGUL SYLLABLE HAG..HANGUL SYLLABLE HAH +D575..D58F ; LVT # Lo [27] HANGUL SYLLABLE HAEG..HANGUL SYLLABLE HAEH +D591..D5AB ; LVT # Lo [27] HANGUL SYLLABLE HYAG..HANGUL SYLLABLE HYAH +D5AD..D5C7 ; LVT # Lo [27] HANGUL SYLLABLE HYAEG..HANGUL SYLLABLE HYAEH +D5C9..D5E3 ; LVT # Lo [27] HANGUL SYLLABLE HEOG..HANGUL SYLLABLE HEOH +D5E5..D5FF ; LVT # Lo [27] HANGUL SYLLABLE HEG..HANGUL SYLLABLE HEH +D601..D61B ; LVT # Lo [27] HANGUL SYLLABLE HYEOG..HANGUL SYLLABLE HYEOH +D61D..D637 ; LVT # Lo [27] HANGUL SYLLABLE HYEG..HANGUL SYLLABLE HYEH +D639..D653 ; LVT # Lo [27] HANGUL SYLLABLE HOG..HANGUL SYLLABLE HOH +D655..D66F ; LVT # Lo [27] HANGUL SYLLABLE HWAG..HANGUL SYLLABLE HWAH +D671..D68B ; LVT # Lo [27] HANGUL SYLLABLE HWAEG..HANGUL SYLLABLE HWAEH +D68D..D6A7 ; LVT # Lo [27] HANGUL SYLLABLE HOEG..HANGUL SYLLABLE HOEH +D6A9..D6C3 ; LVT # Lo [27] HANGUL SYLLABLE HYOG..HANGUL SYLLABLE HYOH +D6C5..D6DF ; LVT # Lo [27] HANGUL SYLLABLE HUG..HANGUL SYLLABLE HUH +D6E1..D6FB ; LVT # Lo [27] HANGUL SYLLABLE HWEOG..HANGUL SYLLABLE HWEOH +D6FD..D717 ; LVT # Lo [27] HANGUL SYLLABLE HWEG..HANGUL SYLLABLE HWEH +D719..D733 ; LVT # Lo [27] HANGUL SYLLABLE HWIG..HANGUL SYLLABLE HWIH +D735..D74F ; LVT # Lo [27] HANGUL SYLLABLE HYUG..HANGUL SYLLABLE HYUH +D751..D76B ; LVT # Lo [27] HANGUL SYLLABLE HEUG..HANGUL SYLLABLE HEUH +D76D..D787 ; LVT # Lo [27] HANGUL SYLLABLE HYIG..HANGUL SYLLABLE HYIH +D789..D7A3 ; LVT # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH + +# Total code points: 10773 + +# ================================================ + +200D ; ZWJ # Cf ZERO WIDTH JOINER + +# Total code points: 1 + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/PropList.txt b/3rd/pcre2/maint/Unicode.tables/PropList.txt new file mode 100644 index 00000000..fae2831e --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/PropList.txt @@ -0,0 +1,1928 @@ +# PropList-16.0.0.txt +# Date: 2024-05-31, 18:09:48 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ + +# ================================================ + +0009..000D ; White_Space # Cc [5] .. +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 25 + +# ================================================ + +061C ; Bidi_Control # Cf ARABIC LETTER MARK +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE + +# Total code points: 12 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF +1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Dash # Pd DOUBLE HYPHEN +2E5D ; Dash # Pd OBLIQUE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS +10D6E ; Dash # Pd GARAY HYPHEN +10EAD ; Dash # Pd YEZIDI HYPHENATION MARK + +# Total code points: 31 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 30 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061D..061F ; Terminal_Punctuation # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK +0830..0835 ; Terminal_Punctuation # Po [6] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION SHIYYAALAA +0837..083E ; Terminal_Punctuation # Po [8] SAMARITAN PUNCTUATION MELODIC QITSA..SAMARITAN PUNCTUATION ANNAAU +085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Terminal_Punctuation # Po CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B4E..1B4F ; Terminal_Punctuation # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN +1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1B7D..1B7F ; Terminal_Punctuation # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK +1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +2024 ; Terminal_Punctuation # Po ONE DOT LEADER +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; Terminal_Punctuation # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK +2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK +2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP +2E41 ; Terminal_Punctuation # Po REVERSED COMMA +2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA +2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER +2E53..2E54 ; Terminal_Punctuation # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK +A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK +A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA +A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI +AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE12 ; Terminal_Punctuation # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; Terminal_Punctuation # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA +1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER +103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER +10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN +1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR +10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS +10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10F55..10F59 ; Terminal_Punctuation # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Terminal_Punctuation # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK +111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK +112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +113D4..113D5 ; Terminal_Punctuation # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA +1145A..1145B ; Terminal_Punctuation # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR +115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11944 ; Terminal_Punctuation # Po DIVES AKURU DOUBLE DANDA +11946 ; Terminal_Punctuation # Po DIVES AKURU END OF TEXT MARK +11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 +11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR +11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD +11EF7..11EF8 ; Terminal_Punctuation # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F44 ; Terminal_Punctuation # Po [2] KAWI DANDA..KAWI DOUBLE DANDA +12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP +16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM +16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +16D6E..16D6F ; Terminal_Punctuation # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA +16E97..16E98 ; Terminal_Punctuation # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP +1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON + +# Total code points: 291 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Other_Math # L& EULER CONSTANT +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW +21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +2308 ; Other_Math # Ps LEFT CEILING +2309 ; Other_Math # Pe RIGHT CEILING +230A ; Other_Math # Ps LEFT FLOOR +230B ; Other_Math # Pe RIGHT FLOOR +23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +23E2 ; Other_Math # So WHITE TRAPEZIUM +25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR +2640 ; Other_Math # So FEMALE SIGN +2642 ; Other_Math # So MALE SIGN +2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1362 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +0363..036F ; Other_Alphabetic # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X +05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +0897 ; Other_Alphabetic # Mn ARABIC PEPET +08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA +08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF3 ; Other_Alphabetic # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Other_Alphabetic # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F83 ; Other_Alphabetic # Mn [4] TIBETAN VOWEL SIGN REVERSED I..TIBETAN SIGN SNA LDAN +0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062..1064 ; Other_Alphabetic # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1067..106D ; Other_Alphabetic # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Other_Alphabetic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Other_Alphabetic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Other_Alphabetic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Other_Alphabetic # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1ABF..1AC0 ; Other_Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW +1ACC..1ACE ; Other_Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH +1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN +1DD3..1DF4 ; Other_Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A802 ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN DVISVARA +A80B ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN ANUSVARA +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU +A8FF ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN AY +A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H +A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Other_Alphabetic # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9BF ; Other_Alphabetic # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +A9E5 ; Other_Alphabetic # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AA7B ; Other_Alphabetic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Other_Alphabetic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Other_Alphabetic # Mc MYANMAR SIGN TAI LAING TONE-5 +AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E +10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY +11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA +11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11073..11074 ; Other_Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11080..11081 ; Other_Alphabetic # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110C2 ; Other_Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11145..11146 ; Other_Alphabetic # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +111CE ; Other_Alphabetic # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; Other_Alphabetic # Mn SHARADA SIGN INVERTED CANDRABINDU +1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA +1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN +11241 ; Other_Alphabetic # Mn KHOJKI VOWEL SIGN VOCALIC R +112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Other_Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Other_Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Other_Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Other_Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA +11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA +11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA +114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA +115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA +115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA +11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +1171D ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Other_Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +1182C..1182E ; Other_Alphabetic # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Other_Alphabetic # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Other_Alphabetic # Mc DOGRA SIGN VISARGA +11930..11935 ; Other_Alphabetic # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Other_Alphabetic # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; Other_Alphabetic # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +11940 ; Other_Alphabetic # Mc DIVES AKURU MEDIAL YA +11942 ; Other_Alphabetic # Mc DIVES AKURU MEDIAL RA +119D1..119D3 ; Other_Alphabetic # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Other_Alphabetic # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Other_Alphabetic # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Other_Alphabetic # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E4 ; Other_Alphabetic # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A01..11A0A ; Other_Alphabetic # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA +11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA +11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA +11D8A..11D8E ; Other_Alphabetic # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Other_Alphabetic # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Other_Alphabetic # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Other_Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Other_Alphabetic # Mc GUNJALA GONDI SIGN VISARGA +11EF3..11EF4 ; Other_Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Other_Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11F00..11F01 ; Other_Alphabetic # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F03 ; Other_Alphabetic # Mc KAWI SIGN VISARGA +11F34..11F35 ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; Other_Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; Other_Alphabetic # Mn KAWI VOWEL SIGN EU +1611E..16129 ; Other_Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Other_Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Other_Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA +16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FF0..16FF1 ; Other_Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK +1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; Other_Alphabetic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E947 ; Other_Alphabetic # Mn ADLAM HAMZA +1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1495 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DBF ; Ideographic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..9FFF ; Ideographic # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER +17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 +1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Ideographic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 106477 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Diacritic # Lm MODIFIER LETTER VOICING +02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374 ; Diacritic # Lm GREEK NUMERAL SIGN +0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +0898..089F ; Diacritic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08C9 ; Diacritic # Lm ARABIC SMALL FARSI YEH +08CA..08D2 ; Diacritic # Mn [9] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT INSIDE CIRCLE BELOW +08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C3C ; Diacritic # Mn TELUGU SIGN NUKTA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E3A ; Diacritic # Mn THAI CHARACTER PHINTHU +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EBA ; Diacritic # Mn LAO SIGN PALI VIRAMA +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1063..1064 ; Diacritic # Mc [2] MYANMAR TONE MARK SGAW KAREN HATHI..MYANMAR TONE MARK SGAW KAREN KE PHO +1069..106D ; Diacritic # Mc [5] MYANMAR SIGN WESTERN PWO KAREN TONE-1..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 +135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA +1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD +1734 ; Diacritic # Mc HANUNOO SIGN PAMUDPOD +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A60 ; Diacritic # Mn TAI THAM SIGN SAKOT +1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY +1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT +1B34 ; Diacritic # Mn BALINESE SIGN REREKAN +1B44 ; Diacritic # Mc BALINESE ADEG ADEG +1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1BE6 ; Diacritic # Mn BATAK SIGN TOMPI +1BF2..1BF3 ; Diacritic # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Diacritic # Mn VEDIC SIGN TIRYAK +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW +1DF5..1DFF ; Diacritic # Mn [11] COMBINING UP TACK ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2E2F ; Diacritic # Lm VERTICAL TILDE +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET +A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67F ; Diacritic # Lm CYRILLIC PAYEROK +A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A700..A716 ; Diacritic # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A806 ; Diacritic # Mn SYLOTI NAGRI SIGN HASANTA +A82C ; Diacritic # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A92E ; Diacritic # Po KAYAH LI SIGN CWI +A953 ; Diacritic # Mc REJANG VIRAMA +A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU +A9C0 ; Diacritic # Mc JAVANESE PANGKON +A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW +AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 +AABF ; Diacritic # Mn TAI VIET TONE MAI EK +AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG +AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO +AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA +AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Diacritic # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; Diacritic # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK +ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10A38..10A3A ; Diacritic # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Diacritic # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11046 ; Diacritic # Mn BRAHMI VIRAMA +11070 ; Diacritic # Mn BRAHMI SIGN OLD TAMIL VIRAMA +110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK +11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA +11236 ; Diacritic # Mn KHOJKI SIGN NUKTA +112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133B..1133C ; Diacritic # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA +11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; Diacritic # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Diacritic # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Diacritic # Mn TULU-TIGALARI CONJOINER +113D2 ; Diacritic # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; Diacritic # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; Diacritic # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA +11442 ; Diacritic # Mn NEWA SIGN VIRAMA +11446 ; Diacritic # Mn NEWA SIGN NUKTA +114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +1163F ; Diacritic # Mn MODI SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +1172B ; Diacritic # Mn AHOM SIGN KILLER +11839..1183A ; Diacritic # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193D ; Diacritic # Mc DIVES AKURU SIGN HALANTA +1193E ; Diacritic # Mn DIVES AKURU VIRAMA +11943 ; Diacritic # Mn DIVES AKURU SIGN NUKTA +119E0 ; Diacritic # Mn NANDINAGARI SIGN VIRAMA +11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Diacritic # Mn SOYOMBO SUBJOINER +11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA +11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA +11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11F41 ; Diacritic # Mc KAWI SIGN KILLER +11F42 ; Diacritic # Mn KAWI CONJOINER +11F5A ; Diacritic # Mn KAWI SIGN NUKTA +13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1612F ; Diacritic # Mn GURUNG KHEMA SIGN THOLHOMA +16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D6B..16D6C ; Diacritic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1AFF0..1AFF3 ; Diacritic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Diacritic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Diacritic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1CF00..1CF2D ; Diacritic # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Diacritic # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E030..1E06D ; Diacritic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E5EE..1E5EF ; Diacritic # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR +1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK +1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA + +# Total code points: 1178 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +07FA ; Extender # Lm NKO LAJANYALAN +0A71 ; Extender # Mn GURMUKHI ADDAK +0AFB ; Extender # Mn GUJARATI SIGN SHADDA +0B55 ; Extender # Mn ORIYA SIGN OVERLINE +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK +1C36 ; Extender # Mn LEPCHA SIGN RAN +1C7B ; Extender # Lm OL CHIKI RELAA +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +A60C ; Extender # Lm VAI SYLLABLE LENGTHENER +A9CF ; Extender # Lm JAVANESE PANGRANGKEP +A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON +10D4E ; Extender # Lm GARAY VOWEL LENGTH MARK +10D6A ; Extender # Mn GARAY CONSONANT GEMINATION MARK +10D6F ; Extender # Lm GARAY REDUPLICATION MARK +11237 ; Extender # Mn KHOJKI SIGN SHADDA +1135D ; Extender # Lo GRANTHA SIGN PLUTA +113D2 ; Extender # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; Extender # Lo TULU-TIGALARI SIGN PLUTA +115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +11A98 ; Extender # Mn SOYOMBO GEMINATION MARK +16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM +16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK +1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E5EF ; Extender # Mn OL ONAL SIGN IKIR +1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK + +# Total code points: 59 + +# ================================================ + +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +10FC ; Other_Lowercase # Lm MODIFIER LETTER GEORGIAN NAR +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F2..A7F4 ; Other_Lowercase # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Other_Lowercase # Lm MODIFIER LETTER SMALL TURNED W +10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1E030..1E06D ; Other_Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE + +# Total code points: 311 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 120 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] .. +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] .. +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] .. +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] .. +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] .. +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] .. +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] .. +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] .. +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] .. +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] .. +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] .. +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] .. +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] .. +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] .. +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] .. +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] .. +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .. +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] .. + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC0 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN II +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CC7..0CC8 ; Other_Grapheme_Extend # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Grapheme_Extend # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +1715 ; Other_Grapheme_Extend # Mc TAGALOG SIGN PAMUDPOD +1734 ; Other_Grapheme_Extend # Mc HANUNOO SIGN PAMUDPOD +1B35 ; Other_Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B3B ; Other_Grapheme_Extend # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D ; Other_Grapheme_Extend # Mc BALINESE VOWEL SIGN LA LENGA TEDUNG +1B43..1B44 ; Other_Grapheme_Extend # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1BAA ; Other_Grapheme_Extend # Mc SUNDANESE SIGN PAMAAEH +1BF2..1BF3 ; Other_Grapheme_Extend # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +A953 ; Other_Grapheme_Extend # Mc REJANG VIRAMA +A9C0 ; Other_Grapheme_Extend # Mc JAVANESE PANGKON +FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +111C0 ; Other_Grapheme_Extend # Mc SHARADA SIGN VIRAMA +11235 ; Other_Grapheme_Extend # Mc KHOJKI SIGN VIRAMA +1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +1134D ; Other_Grapheme_Extend # Mc GRANTHA SIGN VIRAMA +11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +113B8 ; Other_Grapheme_Extend # Mc TULU-TIGALARI VOWEL SIGN AA +113C2 ; Other_Grapheme_Extend # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Other_Grapheme_Extend # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113C9 ; Other_Grapheme_Extend # Mc [3] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI AU LENGTH MARK +113CF ; Other_Grapheme_Extend # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +116B6 ; Other_Grapheme_Extend # Mc TAKRI SIGN VIRAMA +11930 ; Other_Grapheme_Extend # Mc DIVES AKURU VOWEL SIGN AA +1193D ; Other_Grapheme_Extend # Mc DIVES AKURU SIGN HALANTA +11F41 ; Other_Grapheme_Extend # Mc KAWI SIGN KILLER +16FF0..16FF1 ; Other_Grapheme_Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1D165..1D166 ; Other_Grapheme_Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16D..1D172 ; Other_Grapheme_Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 160 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFD ; IDS_Binary_Operator # So [10] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT +31EF ; IDS_Binary_Operator # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION + +# Total code points: 13 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2FFE..2FFF ; IDS_Unary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DBF ; Unified_Ideograph # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..9FFF ; Unified_Ideograph # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6DF ; Unified_Ideograph # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B739 ; Unified_Ideograph # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +31350..323AF ; Unified_Ideograph # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + +# Total code points: 97680 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +2065 ; Other_Default_Ignorable_Code_Point # Cn +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] .. +E0000 ; Other_Default_Ignorable_Code_Point # Cn +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] .. +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] .. +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] .. + +# Total code points: 3776 + +# ================================================ + +0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR +0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL +17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET +232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET +E0001 ; Deprecated # Cf LANGUAGE TAG + +# Total code points: 15 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J +1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK +1E04C..1E04D ; Soft_Dotted # Lm [2] MODIFIER LETTER CYRILLIC SMALL BYELORUSSIAN-UKRAINIAN I..MODIFIER LETTER CYRILLIC SMALL JE +1E068 ; Soft_Dotted # Lm CYRILLIC SUBSCRIPT SMALL LETTER BYELORUSSIAN-UKRAINIAN I + +# Total code points: 50 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY +AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA +AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY + +# Total code points: 19 + +# ================================================ + +1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 6 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE +200C..200D ; Other_ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +30FB ; Other_ID_Continue # Po KATAKANA MIDDLE DOT +FF65 ; Other_ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 16 + +# ================================================ + +00B2..00B3 ; ID_Compat_Math_Continue # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B9 ; ID_Compat_Math_Continue # No SUPERSCRIPT ONE +2070 ; ID_Compat_Math_Continue # No SUPERSCRIPT ZERO +2074..2079 ; ID_Compat_Math_Continue # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; ID_Compat_Math_Continue # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; ID_Compat_Math_Continue # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; ID_Compat_Math_Continue # Pe SUPERSCRIPT RIGHT PARENTHESIS +2080..2089 ; ID_Compat_Math_Continue # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; ID_Compat_Math_Continue # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; ID_Compat_Math_Continue # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; ID_Compat_Math_Continue # Pe SUBSCRIPT RIGHT PARENTHESIS +2202 ; ID_Compat_Math_Continue # Sm PARTIAL DIFFERENTIAL +2207 ; ID_Compat_Math_Continue # Sm NABLA +221E ; ID_Compat_Math_Continue # Sm INFINITY +1D6C1 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD NABLA +1D6DB ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC NABLA +1D715 ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC NABLA +1D74F ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D789 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7C3 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + +# Total code points: 43 + +# ================================================ + +2202 ; ID_Compat_Math_Start # Sm PARTIAL DIFFERENTIAL +2207 ; ID_Compat_Math_Start # Sm NABLA +221E ; ID_Compat_Math_Start # Sm INFINITY +1D6C1 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD NABLA +1D6DB ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC NABLA +1D715 ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC NABLA +1D74F ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D789 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7C3 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + +# Total code points: 13 + +# ================================================ + +0021 ; Sentence_Terminal # Po EXCLAMATION MARK +002E ; Sentence_Terminal # Po FULL STOP +003F ; Sentence_Terminal # Po QUESTION MARK +0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP +061D..061F ; Sentence_Terminal # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +06D4 ; Sentence_Terminal # Po ARABIC FULL STOP +0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK +0837 ; Sentence_Terminal # Po SAMARITAN PUNCTUATION MELODIC QITSA +0839 ; Sentence_Terminal # Po SAMARITAN PUNCTUATION QITSA +083D..083E ; Sentence_Terminal # Po [2] SAMARITAN PUNCTUATION SOF MASHFAAT..SAMARITAN PUNCTUATION ANNAAU +0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP +1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP +1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D5 ; Sentence_Terminal # Po [2] KHMER SIGN KHAN..KHMER SIGN BARIYOOSAN +1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP +1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B4E..1B4F ; Sentence_Terminal # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN +1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1B7D..1B7F ; Sentence_Terminal # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK +1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL +1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +2024 ; Sentence_Terminal # Po ONE DOT LEADER +203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; Sentence_Terminal # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK +2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK +2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP +2E53..2E54 ; Sentence_Terminal # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK +3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP +A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP +A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK +A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP +A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK +A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA +A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI +AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI +FE12 ; Sentence_Terminal # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; Sentence_Terminal # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE52 ; Sentence_Terminal # Po SMALL FULL STOP +FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK +FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP +FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK +FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP +10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10F55..10F59 ; Sentence_Terminal # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Sentence_Terminal # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA +110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK +111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA +1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK +112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK +113D4..113D5 ; Sentence_Terminal # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA +115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA +115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11944 ; Sentence_Terminal # Po DIVES AKURU DOUBLE DANDA +11946 ; Sentence_Terminal # Po DIVES AKURU END OF TEXT MARK +11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA +11EF7..11EF8 ; Sentence_Terminal # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F43..11F44 ; Sentence_Terminal # Po [2] KAWI DANDA..KAWI DOUBLE DANDA +16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP +16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB +16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS +16D6E..16D6F ; Sentence_Terminal # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA +16E98 ; Sentence_Terminal # Po MEDEFAIDRIN FULL STOP +1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP + +# Total code points: 170 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Variation_Selector # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 260 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] .. +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Pattern_Syntax # Ps LEFT CEILING +2309 ; Pattern_Syntax # Pe RIGHT CEILING +230A ; Pattern_Syntax # Ps LEFT FLOOR +230B ; Pattern_Syntax # Pe RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2429 ; Pattern_Syntax # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM +242A..243F ; Pattern_Syntax # Cn [22] .. +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] .. +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B74..2B75 ; Pattern_Syntax # Cn [2] .. +2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B96 ; Pattern_Syntax # Cn +2B97..2BFF ; Pattern_Syntax # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS +2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET +2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET +2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Pattern_Syntax # Lm VERTICAL TILDE +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN +2E41 ; Pattern_Syntax # Po REVERSED COMMA +2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; Pattern_Syntax # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; Pattern_Syntax # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Pattern_Syntax # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Pattern_Syntax # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Pattern_Syntax # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Pattern_Syntax # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; Pattern_Syntax # Pd OBLIQUE HYPHEN +2E5E..2E7F ; Pattern_Syntax # Cn [34] .. +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 + +# ================================================ + +0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH +070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK +0890..0891 ; Prepended_Concatenation_Mark # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH +110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN +110CD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN ABOVE + +# Total code points: 13 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# ================================================ + +0654..0655 ; Modifier_Combining_Mark # Mn [2] ARABIC HAMZA ABOVE..ARABIC HAMZA BELOW +0658 ; Modifier_Combining_Mark # Mn ARABIC MARK NOON GHUNNA +06DC ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH SEEN +06E3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW SEEN +06E7..06E8 ; Modifier_Combining_Mark # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +08CA..08CB ; Modifier_Combining_Mark # Mn [2] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW +08CD..08CF ; Modifier_Combining_Mark # Mn [3] ARABIC SMALL HIGH ZAH..ARABIC LARGE ROUND DOT BELOW +08D3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW WAW +08F3 ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH WAW + +# Total code points: 14 + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/PropertyAliases.txt b/3rd/pcre2/maint/Unicode.tables/PropertyAliases.txt new file mode 100644 index 00000000..69dbb8d2 --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/PropertyAliases.txt @@ -0,0 +1,225 @@ +# PropertyAliases-16.0.0.txt +# Date: 2024-06-06, 21:52:48 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# This file contains aliases for properties used in the UCD. +# These names can be used for XML formats of UCD data, for regular-expression +# property tests, and other programmatic textual descriptions of Unicode data. +# +# The names may be translated in appropriate environments, and additional +# aliases may be useful. +# +# FORMAT +# +# Each line has two or more fields, separated by semicolons. +# +# First Field: The first field is the short name for the property. +# It is typically an abbreviation, but in a number of cases it is simply +# a duplicate of the "long name" in the second field. +# For Unihan database tags, the short name is actually a longer string than +# the tag specified in the second field. +# +# Second Field: The second field is the long name for the property, +# typically the formal name used in documentation about the property. +# +# The above are the preferred aliases. Other aliases may be listed in additional fields. +# +# Loose matching should be applied to all property names and property values, with +# the exception of String Property values. With loose matching of property names and +# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". +# +# NOTE: Property value names are NOT unique across properties. For example: +# +# AL means Arabic Letter for the Bidi_Class property, and +# AL means Above_Left for the Combining_Class property, and +# AL means Alphabetic for the Line_Break property. +# +# In addition, some property names may be the same as some property value names. +# For example: +# +# sc means the Script property, and +# Sc means the General_Category property value Currency_Symbol (Sc) +# +# The combination of property value and property name is, however, unique. +# +# For more information, see UAX #44, Unicode Character Database, and +# UTS #18, Unicode Regular Expressions. +# ================================================ + + +# ================================================ +# Numeric Properties +# ================================================ +cjkAccountingNumeric ; kAccountingNumeric +cjkOtherNumeric ; kOtherNumeric +cjkPrimaryNumeric ; kPrimaryNumeric +nv ; Numeric_Value + +# ================================================ +# String Properties +# ================================================ +bmg ; Bidi_Mirroring_Glyph +bpb ; Bidi_Paired_Bracket +cf ; Case_Folding +cjkCompatibilityVariant ; kCompatibilityVariant +dm ; Decomposition_Mapping +EqUIdeo ; Equivalent_Unified_Ideograph +FC_NFKC ; FC_NFKC_Closure +lc ; Lowercase_Mapping +NFKC_CF ; NFKC_Casefold +NFKC_SCF ; NFKC_Simple_Casefold +scf ; Simple_Case_Folding ; sfc +slc ; Simple_Lowercase_Mapping +stc ; Simple_Titlecase_Mapping +suc ; Simple_Uppercase_Mapping +tc ; Titlecase_Mapping +uc ; Uppercase_Mapping + +# ================================================ +# Miscellaneous Properties +# ================================================ +cjkIICore ; kIICore +cjkIRG_GSource ; kIRG_GSource +cjkIRG_HSource ; kIRG_HSource +cjkIRG_JSource ; kIRG_JSource +cjkIRG_KPSource ; kIRG_KPSource +cjkIRG_KSource ; kIRG_KSource +cjkIRG_MSource ; kIRG_MSource +cjkIRG_SSource ; kIRG_SSource +cjkIRG_TSource ; kIRG_TSource +cjkIRG_UKSource ; kIRG_UKSource +cjkIRG_USource ; kIRG_USource +cjkIRG_VSource ; kIRG_VSource +cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS +isc ; ISO_Comment +JSN ; Jamo_Short_Name +kEH_Cat ; kEH_Cat +kEH_Desc ; kEH_Desc +kEH_HG ; kEH_HG +kEH_IFAO ; kEH_IFAO +kEH_JSesh ; kEH_JSesh +na ; Name +na1 ; Unicode_1_Name +Name_Alias ; Name_Alias +scx ; Script_Extensions + +# ================================================ +# Catalog Properties +# ================================================ +age ; Age +blk ; Block +sc ; Script + +# ================================================ +# Enumerated Properties +# ================================================ +bc ; Bidi_Class +bpt ; Bidi_Paired_Bracket_Type +ccc ; Canonical_Combining_Class +dt ; Decomposition_Type +ea ; East_Asian_Width +gc ; General_Category +GCB ; Grapheme_Cluster_Break +hst ; Hangul_Syllable_Type +InCB ; Indic_Conjunct_Break +InPC ; Indic_Positional_Category +InSC ; Indic_Syllabic_Category +jg ; Joining_Group +jt ; Joining_Type +lb ; Line_Break +NFC_QC ; NFC_Quick_Check +NFD_QC ; NFD_Quick_Check +NFKC_QC ; NFKC_Quick_Check +NFKD_QC ; NFKD_Quick_Check +nt ; Numeric_Type +SB ; Sentence_Break +vo ; Vertical_Orientation +WB ; Word_Break + +# ================================================ +# Binary Properties +# ================================================ +AHex ; ASCII_Hex_Digit +Alpha ; Alphabetic +Bidi_C ; Bidi_Control +Bidi_M ; Bidi_Mirrored +Cased ; Cased +CE ; Composition_Exclusion +CI ; Case_Ignorable +Comp_Ex ; Full_Composition_Exclusion +CWCF ; Changes_When_Casefolded +CWCM ; Changes_When_Casemapped +CWKCF ; Changes_When_NFKC_Casefolded +CWL ; Changes_When_Lowercased +CWT ; Changes_When_Titlecased +CWU ; Changes_When_Uppercased +Dash ; Dash +Dep ; Deprecated +DI ; Default_Ignorable_Code_Point +Dia ; Diacritic +EBase ; Emoji_Modifier_Base +EComp ; Emoji_Component +EMod ; Emoji_Modifier +Emoji ; Emoji +EPres ; Emoji_Presentation +Ext ; Extender +ExtPict ; Extended_Pictographic +Gr_Base ; Grapheme_Base +Gr_Ext ; Grapheme_Extend +Gr_Link ; Grapheme_Link +Hex ; Hex_Digit +Hyphen ; Hyphen +ID_Compat_Math_Continue ; ID_Compat_Math_Continue +ID_Compat_Math_Start ; ID_Compat_Math_Start +IDC ; ID_Continue +Ideo ; Ideographic +IDS ; ID_Start +IDSB ; IDS_Binary_Operator +IDST ; IDS_Trinary_Operator +IDSU ; IDS_Unary_Operator +Join_C ; Join_Control +kEH_NoMirror ; kEH_NoMirror +kEH_NoRotate ; kEH_NoRotate +LOE ; Logical_Order_Exception +Lower ; Lowercase +Math ; Math +MCM ; Modifier_Combining_Mark +NChar ; Noncharacter_Code_Point +OAlpha ; Other_Alphabetic +ODI ; Other_Default_Ignorable_Code_Point +OGr_Ext ; Other_Grapheme_Extend +OIDC ; Other_ID_Continue +OIDS ; Other_ID_Start +OLower ; Other_Lowercase +OMath ; Other_Math +OUpper ; Other_Uppercase +Pat_Syn ; Pattern_Syntax +Pat_WS ; Pattern_White_Space +PCM ; Prepended_Concatenation_Mark +QMark ; Quotation_Mark +Radical ; Radical +RI ; Regional_Indicator +SD ; Soft_Dotted +STerm ; Sentence_Terminal +Term ; Terminal_Punctuation +UIdeo ; Unified_Ideograph +Upper ; Uppercase +VS ; Variation_Selector +WSpace ; White_Space ; space +XIDC ; XID_Continue +XIDS ; XID_Start +XO_NFC ; Expands_On_NFC +XO_NFD ; Expands_On_NFD +XO_NFKC ; Expands_On_NFKC +XO_NFKD ; Expands_On_NFKD + +# ================================================ +# Total: 142 + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/PropertyValueAliases.txt b/3rd/pcre2/maint/Unicode.tables/PropertyValueAliases.txt new file mode 100644 index 00000000..01c6f659 --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/PropertyValueAliases.txt @@ -0,0 +1,1708 @@ +# PropertyValueAliases-16.0.0.txt +# Date: 2024-07-30, 19:59:00 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# This file contains aliases for property values used in the UCD. +# These names can be used for XML formats of UCD data, for regular-expression +# property tests, and other programmatic textual descriptions of Unicode data. +# +# The names may be translated in appropriate environments, and additional +# aliases may be useful. +# +# FORMAT +# +# Each line describes a property value name. +# This consists of three or more fields, separated by semicolons. +# +# First Field: The first field describes the property for which that +# property value name is used. +# +# Second Field: The second field is the short name for the property value. +# It is typically an abbreviation, but in a number of cases it is simply +# a duplicate of the "long name" in the third field. +# +# Third Field: The third field is the long name for the property value, +# typically the formal name used in documentation about the property value. +# +# In the case of Canonical_Combining_Class (ccc), there are 4 fields: +# The second field is numeric, the third is the short name, and the fourth is the long name. +# +# The above are the preferred aliases. Other aliases may be listed in additional fields. +# +# Loose matching should be applied to all property names and property values, with +# the exception of String Property values. With loose matching of property names and +# values, the case distinctions, whitespace, hyphens, and '_' are ignored. +# For Numeric Property values, numeric equivalence is applied: thus "01.00" +# is equivalent to "1". +# +# NOTE: Property value names are NOT unique across properties. For example: +# +# AL means Arabic Letter for the Bidi_Class property, and +# AL means Above_Left for the Canonical_Combining_Class property, and +# AL means Alphabetic for the Line_Break property. +# +# In addition, some property names may be the same as some property value names. +# For example: +# +# sc means the Script property, and +# Sc means the General_Category property value Currency_Symbol (Sc) +# +# The combination of property value and property name is, however, unique. +# +# For more information, see UAX #44, Unicode Character Database, and +# UTS #18, Unicode Regular Expressions. +# ================================================ + + +# ASCII_Hex_Digit (AHex) + +AHex; N ; No ; F ; False +AHex; Y ; Yes ; T ; True + +# Age (age) + +age; 1.1 ; V1_1 +age; 2.0 ; V2_0 +age; 2.1 ; V2_1 +age; 3.0 ; V3_0 +age; 3.1 ; V3_1 +age; 3.2 ; V3_2 +age; 4.0 ; V4_0 +age; 4.1 ; V4_1 +age; 5.0 ; V5_0 +age; 5.1 ; V5_1 +age; 5.2 ; V5_2 +age; 6.0 ; V6_0 +age; 6.1 ; V6_1 +age; 6.2 ; V6_2 +age; 6.3 ; V6_3 +age; 7.0 ; V7_0 +age; 8.0 ; V8_0 +age; 9.0 ; V9_0 +age; 10.0 ; V10_0 +age; 11.0 ; V11_0 +age; 12.0 ; V12_0 +age; 12.1 ; V12_1 +age; 13.0 ; V13_0 +age; 14.0 ; V14_0 +age; 15.0 ; V15_0 +age; 15.1 ; V15_1 +age; 16.0 ; V16_0 +age; NA ; Unassigned + +# Alphabetic (Alpha) + +Alpha; N ; No ; F ; False +Alpha; Y ; Yes ; T ; True + +# Bidi_Class (bc) + +bc ; AL ; Arabic_Letter +bc ; AN ; Arabic_Number +bc ; B ; Paragraph_Separator +bc ; BN ; Boundary_Neutral +bc ; CS ; Common_Separator +bc ; EN ; European_Number +bc ; ES ; European_Separator +bc ; ET ; European_Terminator +bc ; FSI ; First_Strong_Isolate +bc ; L ; Left_To_Right +bc ; LRE ; Left_To_Right_Embedding +bc ; LRI ; Left_To_Right_Isolate +bc ; LRO ; Left_To_Right_Override +bc ; NSM ; Nonspacing_Mark +bc ; ON ; Other_Neutral +bc ; PDF ; Pop_Directional_Format +bc ; PDI ; Pop_Directional_Isolate +bc ; R ; Right_To_Left +bc ; RLE ; Right_To_Left_Embedding +bc ; RLI ; Right_To_Left_Isolate +bc ; RLO ; Right_To_Left_Override +bc ; S ; Segment_Separator +bc ; WS ; White_Space + +# Bidi_Control (Bidi_C) + +Bidi_C; N ; No ; F ; False +Bidi_C; Y ; Yes ; T ; True + +# Bidi_Mirrored (Bidi_M) + +Bidi_M; N ; No ; F ; False +Bidi_M; Y ; Yes ; T ; True + +# Bidi_Mirroring_Glyph (bmg) + + +# Bidi_Paired_Bracket (bpb) + +# @missing: 0000..10FFFF; Bidi_Paired_Bracket; + +# Bidi_Paired_Bracket_Type (bpt) + +bpt; c ; Close +bpt; n ; None +bpt; o ; Open +# @missing: 0000..10FFFF; Bidi_Paired_Bracket_Type; n + +# Block (blk) + +blk; Adlam ; Adlam +blk; Aegean_Numbers ; Aegean_Numbers +blk; Ahom ; Ahom +blk; Alchemical ; Alchemical_Symbols +blk; Alphabetic_PF ; Alphabetic_Presentation_Forms +blk; Anatolian_Hieroglyphs ; Anatolian_Hieroglyphs +blk; Ancient_Greek_Music ; Ancient_Greek_Musical_Notation +blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers +blk; Ancient_Symbols ; Ancient_Symbols +blk; Arabic ; Arabic +blk; Arabic_Ext_A ; Arabic_Extended_A +blk; Arabic_Ext_B ; Arabic_Extended_B +blk; Arabic_Ext_C ; Arabic_Extended_C +blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols +blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A +blk; Arabic_PF_B ; Arabic_Presentation_Forms_B +blk; Arabic_Sup ; Arabic_Supplement +blk; Armenian ; Armenian +blk; Arrows ; Arrows +blk; ASCII ; Basic_Latin +blk; Avestan ; Avestan +blk; Balinese ; Balinese +blk; Bamum ; Bamum +blk; Bamum_Sup ; Bamum_Supplement +blk; Bassa_Vah ; Bassa_Vah +blk; Batak ; Batak +blk; Bengali ; Bengali +blk; Bhaiksuki ; Bhaiksuki +blk; Block_Elements ; Block_Elements +blk; Bopomofo ; Bopomofo +blk; Bopomofo_Ext ; Bopomofo_Extended +blk; Box_Drawing ; Box_Drawing +blk; Brahmi ; Brahmi +blk; Braille ; Braille_Patterns +blk; Buginese ; Buginese +blk; Buhid ; Buhid +blk; Byzantine_Music ; Byzantine_Musical_Symbols +blk; Carian ; Carian +blk; Caucasian_Albanian ; Caucasian_Albanian +blk; Chakma ; Chakma +blk; Cham ; Cham +blk; Cherokee ; Cherokee +blk; Cherokee_Sup ; Cherokee_Supplement +blk; Chess_Symbols ; Chess_Symbols +blk; Chorasmian ; Chorasmian +blk; CJK ; CJK_Unified_Ideographs +blk; CJK_Compat ; CJK_Compatibility +blk; CJK_Compat_Forms ; CJK_Compatibility_Forms +blk; CJK_Compat_Ideographs ; CJK_Compatibility_Ideographs +blk; CJK_Compat_Ideographs_Sup ; CJK_Compatibility_Ideographs_Supplement +blk; CJK_Ext_A ; CJK_Unified_Ideographs_Extension_A +blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B +blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C +blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D +blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E +blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F +blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G +blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H +blk; CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I +blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement +blk; CJK_Strokes ; CJK_Strokes +blk; CJK_Symbols ; CJK_Symbols_And_Punctuation +blk; Compat_Jamo ; Hangul_Compatibility_Jamo +blk; Control_Pictures ; Control_Pictures +blk; Coptic ; Coptic +blk; Coptic_Epact_Numbers ; Coptic_Epact_Numbers +blk; Counting_Rod ; Counting_Rod_Numerals +blk; Cuneiform ; Cuneiform +blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation +blk; Currency_Symbols ; Currency_Symbols +blk; Cypriot_Syllabary ; Cypriot_Syllabary +blk; Cypro_Minoan ; Cypro_Minoan +blk; Cyrillic ; Cyrillic +blk; Cyrillic_Ext_A ; Cyrillic_Extended_A +blk; Cyrillic_Ext_B ; Cyrillic_Extended_B +blk; Cyrillic_Ext_C ; Cyrillic_Extended_C +blk; Cyrillic_Ext_D ; Cyrillic_Extended_D +blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary +blk; Deseret ; Deseret +blk; Devanagari ; Devanagari +blk; Devanagari_Ext ; Devanagari_Extended +blk; Devanagari_Ext_A ; Devanagari_Extended_A +blk; Diacriticals ; Combining_Diacritical_Marks +blk; Diacriticals_Ext ; Combining_Diacritical_Marks_Extended +blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols +blk; Diacriticals_Sup ; Combining_Diacritical_Marks_Supplement +blk; Dingbats ; Dingbats +blk; Dives_Akuru ; Dives_Akuru +blk; Dogra ; Dogra +blk; Domino ; Domino_Tiles +blk; Duployan ; Duployan +blk; Early_Dynastic_Cuneiform ; Early_Dynastic_Cuneiform +blk; Egyptian_Hieroglyph_Format_Controls; Egyptian_Hieroglyph_Format_Controls +blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs +blk; Egyptian_Hieroglyphs_Ext_A ; Egyptian_Hieroglyphs_Extended_A +blk; Elbasan ; Elbasan +blk; Elymaic ; Elymaic +blk; Emoticons ; Emoticons +blk; Enclosed_Alphanum ; Enclosed_Alphanumerics +blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement +blk; Enclosed_CJK ; Enclosed_CJK_Letters_And_Months +blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement +blk; Ethiopic ; Ethiopic +blk; Ethiopic_Ext ; Ethiopic_Extended +blk; Ethiopic_Ext_A ; Ethiopic_Extended_A +blk; Ethiopic_Ext_B ; Ethiopic_Extended_B +blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Garay ; Garay +blk; Geometric_Shapes ; Geometric_Shapes +blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended +blk; Georgian ; Georgian +blk; Georgian_Ext ; Georgian_Extended +blk; Georgian_Sup ; Georgian_Supplement +blk; Glagolitic ; Glagolitic +blk; Glagolitic_Sup ; Glagolitic_Supplement +blk; Gothic ; Gothic +blk; Grantha ; Grantha +blk; Greek ; Greek_And_Coptic +blk; Greek_Ext ; Greek_Extended +blk; Gujarati ; Gujarati +blk; Gunjala_Gondi ; Gunjala_Gondi +blk; Gurmukhi ; Gurmukhi +blk; Gurung_Khema ; Gurung_Khema +blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms +blk; Half_Marks ; Combining_Half_Marks +blk; Hangul ; Hangul_Syllables +blk; Hanifi_Rohingya ; Hanifi_Rohingya +blk; Hanunoo ; Hanunoo +blk; Hatran ; Hatran +blk; Hebrew ; Hebrew +blk; High_PU_Surrogates ; High_Private_Use_Surrogates +blk; High_Surrogates ; High_Surrogates +blk; Hiragana ; Hiragana +blk; IDC ; Ideographic_Description_Characters +blk; Ideographic_Symbols ; Ideographic_Symbols_And_Punctuation +blk; Imperial_Aramaic ; Imperial_Aramaic +blk; Indic_Number_Forms ; Common_Indic_Number_Forms +blk; Indic_Siyaq_Numbers ; Indic_Siyaq_Numbers +blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi +blk; Inscriptional_Parthian ; Inscriptional_Parthian +blk; IPA_Ext ; IPA_Extensions +blk; Jamo ; Hangul_Jamo +blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A +blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B +blk; Javanese ; Javanese +blk; Kaithi ; Kaithi +blk; Kaktovik_Numerals ; Kaktovik_Numerals +blk; Kana_Ext_A ; Kana_Extended_A +blk; Kana_Ext_B ; Kana_Extended_B +blk; Kana_Sup ; Kana_Supplement +blk; Kanbun ; Kanbun +blk; Kangxi ; Kangxi_Radicals +blk; Kannada ; Kannada +blk; Katakana ; Katakana +blk; Katakana_Ext ; Katakana_Phonetic_Extensions +blk; Kawi ; Kawi +blk; Kayah_Li ; Kayah_Li +blk; Kharoshthi ; Kharoshthi +blk; Khitan_Small_Script ; Khitan_Small_Script +blk; Khmer ; Khmer +blk; Khmer_Symbols ; Khmer_Symbols +blk; Khojki ; Khojki +blk; Khudawadi ; Khudawadi +blk; Kirat_Rai ; Kirat_Rai +blk; Lao ; Lao +blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 +blk; Latin_Ext_A ; Latin_Extended_A +blk; Latin_Ext_Additional ; Latin_Extended_Additional +blk; Latin_Ext_B ; Latin_Extended_B +blk; Latin_Ext_C ; Latin_Extended_C +blk; Latin_Ext_D ; Latin_Extended_D +blk; Latin_Ext_E ; Latin_Extended_E +blk; Latin_Ext_F ; Latin_Extended_F +blk; Latin_Ext_G ; Latin_Extended_G +blk; Lepcha ; Lepcha +blk; Letterlike_Symbols ; Letterlike_Symbols +blk; Limbu ; Limbu +blk; Linear_A ; Linear_A +blk; Linear_B_Ideograms ; Linear_B_Ideograms +blk; Linear_B_Syllabary ; Linear_B_Syllabary +blk; Lisu ; Lisu +blk; Lisu_Sup ; Lisu_Supplement +blk; Low_Surrogates ; Low_Surrogates +blk; Lycian ; Lycian +blk; Lydian ; Lydian +blk; Mahajani ; Mahajani +blk; Mahjong ; Mahjong_Tiles +blk; Makasar ; Makasar +blk; Malayalam ; Malayalam +blk; Mandaic ; Mandaic +blk; Manichaean ; Manichaean +blk; Marchen ; Marchen +blk; Masaram_Gondi ; Masaram_Gondi +blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols +blk; Math_Operators ; Mathematical_Operators +blk; Mayan_Numerals ; Mayan_Numerals +blk; Medefaidrin ; Medefaidrin +blk; Meetei_Mayek ; Meetei_Mayek +blk; Meetei_Mayek_Ext ; Meetei_Mayek_Extensions +blk; Mende_Kikakui ; Mende_Kikakui +blk; Meroitic_Cursive ; Meroitic_Cursive +blk; Meroitic_Hieroglyphs ; Meroitic_Hieroglyphs +blk; Miao ; Miao +blk; Misc_Arrows ; Miscellaneous_Symbols_And_Arrows +blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A +blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B +blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs +blk; Misc_Symbols ; Miscellaneous_Symbols +blk; Misc_Technical ; Miscellaneous_Technical +blk; Modi ; Modi +blk; Modifier_Letters ; Spacing_Modifier_Letters +blk; Modifier_Tone_Letters ; Modifier_Tone_Letters +blk; Mongolian ; Mongolian +blk; Mongolian_Sup ; Mongolian_Supplement +blk; Mro ; Mro +blk; Multani ; Multani +blk; Music ; Musical_Symbols +blk; Myanmar ; Myanmar +blk; Myanmar_Ext_A ; Myanmar_Extended_A +blk; Myanmar_Ext_B ; Myanmar_Extended_B +blk; Myanmar_Ext_C ; Myanmar_Extended_C +blk; Nabataean ; Nabataean +blk; Nag_Mundari ; Nag_Mundari +blk; Nandinagari ; Nandinagari +blk; NB ; No_Block +blk; New_Tai_Lue ; New_Tai_Lue +blk; Newa ; Newa +blk; NKo ; NKo +blk; Number_Forms ; Number_Forms +blk; Nushu ; Nushu +blk; Nyiakeng_Puachue_Hmong ; Nyiakeng_Puachue_Hmong +blk; OCR ; Optical_Character_Recognition +blk; Ogham ; Ogham +blk; Ol_Chiki ; Ol_Chiki +blk; Ol_Onal ; Ol_Onal +blk; Old_Hungarian ; Old_Hungarian +blk; Old_Italic ; Old_Italic +blk; Old_North_Arabian ; Old_North_Arabian +blk; Old_Permic ; Old_Permic +blk; Old_Persian ; Old_Persian +blk; Old_Sogdian ; Old_Sogdian +blk; Old_South_Arabian ; Old_South_Arabian +blk; Old_Turkic ; Old_Turkic +blk; Old_Uyghur ; Old_Uyghur +blk; Oriya ; Oriya +blk; Ornamental_Dingbats ; Ornamental_Dingbats +blk; Osage ; Osage +blk; Osmanya ; Osmanya +blk; Ottoman_Siyaq_Numbers ; Ottoman_Siyaq_Numbers +blk; Pahawh_Hmong ; Pahawh_Hmong +blk; Palmyrene ; Palmyrene +blk; Pau_Cin_Hau ; Pau_Cin_Hau +blk; Phags_Pa ; Phags_Pa +blk; Phaistos ; Phaistos_Disc +blk; Phoenician ; Phoenician +blk; Phonetic_Ext ; Phonetic_Extensions +blk; Phonetic_Ext_Sup ; Phonetic_Extensions_Supplement +blk; Playing_Cards ; Playing_Cards +blk; Psalter_Pahlavi ; Psalter_Pahlavi +blk; PUA ; Private_Use_Area ; Private_Use +blk; Punctuation ; General_Punctuation +blk; Rejang ; Rejang +blk; Rumi ; Rumi_Numeral_Symbols +blk; Runic ; Runic +blk; Samaritan ; Samaritan +blk; Saurashtra ; Saurashtra +blk; Sharada ; Sharada +blk; Shavian ; Shavian +blk; Shorthand_Format_Controls ; Shorthand_Format_Controls +blk; Siddham ; Siddham +blk; Sinhala ; Sinhala +blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers +blk; Small_Forms ; Small_Form_Variants +blk; Small_Kana_Ext ; Small_Kana_Extension +blk; Sogdian ; Sogdian +blk; Sora_Sompeng ; Sora_Sompeng +blk; Soyombo ; Soyombo +blk; Specials ; Specials +blk; Sundanese ; Sundanese +blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sunuwar ; Sunuwar +blk; Sup_Arrows_A ; Supplemental_Arrows_A +blk; Sup_Arrows_B ; Supplemental_Arrows_B +blk; Sup_Arrows_C ; Supplemental_Arrows_C +blk; Sup_Math_Operators ; Supplemental_Mathematical_Operators +blk; Sup_PUA_A ; Supplementary_Private_Use_Area_A +blk; Sup_PUA_B ; Supplementary_Private_Use_Area_B +blk; Sup_Punctuation ; Supplemental_Punctuation +blk; Sup_Symbols_And_Pictographs ; Supplemental_Symbols_And_Pictographs +blk; Super_And_Sub ; Superscripts_And_Subscripts +blk; Sutton_SignWriting ; Sutton_SignWriting +blk; Syloti_Nagri ; Syloti_Nagri +blk; Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A +blk; Symbols_For_Legacy_Computing ; Symbols_For_Legacy_Computing +blk; Symbols_For_Legacy_Computing_Sup ; Symbols_For_Legacy_Computing_Supplement +blk; Syriac ; Syriac +blk; Syriac_Sup ; Syriac_Supplement +blk; Tagalog ; Tagalog +blk; Tagbanwa ; Tagbanwa +blk; Tags ; Tags +blk; Tai_Le ; Tai_Le +blk; Tai_Tham ; Tai_Tham +blk; Tai_Viet ; Tai_Viet +blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +blk; Takri ; Takri +blk; Tamil ; Tamil +blk; Tamil_Sup ; Tamil_Supplement +blk; Tangsa ; Tangsa +blk; Tangut ; Tangut +blk; Tangut_Components ; Tangut_Components +blk; Tangut_Sup ; Tangut_Supplement +blk; Telugu ; Telugu +blk; Thaana ; Thaana +blk; Thai ; Thai +blk; Tibetan ; Tibetan +blk; Tifinagh ; Tifinagh +blk; Tirhuta ; Tirhuta +blk; Todhri ; Todhri +blk; Toto ; Toto +blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; Tulu_Tigalari ; Tulu_Tigalari +blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics +blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended +blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A +blk; Ugaritic ; Ugaritic +blk; Vai ; Vai +blk; Vedic_Ext ; Vedic_Extensions +blk; Vertical_Forms ; Vertical_Forms +blk; Vithkuqi ; Vithkuqi +blk; VS ; Variation_Selectors +blk; VS_Sup ; Variation_Selectors_Supplement +blk; Wancho ; Wancho +blk; Warang_Citi ; Warang_Citi +blk; Yezidi ; Yezidi +blk; Yi_Radicals ; Yi_Radicals +blk; Yi_Syllables ; Yi_Syllables +blk; Yijing ; Yijing_Hexagram_Symbols +blk; Zanabazar_Square ; Zanabazar_Square +blk; Znamenny_Music ; Znamenny_Musical_Notation + +# Canonical_Combining_Class (ccc) + +ccc; 0; NR ; Not_Reordered +ccc; 1; OV ; Overlay +ccc; 6; HANR ; Han_Reading +ccc; 7; NK ; Nukta +ccc; 8; KV ; Kana_Voicing +ccc; 9; VR ; Virama +ccc; 10; CCC10 ; CCC10 +ccc; 11; CCC11 ; CCC11 +ccc; 12; CCC12 ; CCC12 +ccc; 13; CCC13 ; CCC13 +ccc; 14; CCC14 ; CCC14 +ccc; 15; CCC15 ; CCC15 +ccc; 16; CCC16 ; CCC16 +ccc; 17; CCC17 ; CCC17 +ccc; 18; CCC18 ; CCC18 +ccc; 19; CCC19 ; CCC19 +ccc; 20; CCC20 ; CCC20 +ccc; 21; CCC21 ; CCC21 +ccc; 22; CCC22 ; CCC22 +ccc; 23; CCC23 ; CCC23 +ccc; 24; CCC24 ; CCC24 +ccc; 25; CCC25 ; CCC25 +ccc; 26; CCC26 ; CCC26 +ccc; 27; CCC27 ; CCC27 +ccc; 28; CCC28 ; CCC28 +ccc; 29; CCC29 ; CCC29 +ccc; 30; CCC30 ; CCC30 +ccc; 31; CCC31 ; CCC31 +ccc; 32; CCC32 ; CCC32 +ccc; 33; CCC33 ; CCC33 +ccc; 34; CCC34 ; CCC34 +ccc; 35; CCC35 ; CCC35 +ccc; 36; CCC36 ; CCC36 +ccc; 84; CCC84 ; CCC84 +ccc; 91; CCC91 ; CCC91 +ccc; 103; CCC103 ; CCC103 +ccc; 107; CCC107 ; CCC107 +ccc; 118; CCC118 ; CCC118 +ccc; 122; CCC122 ; CCC122 +ccc; 129; CCC129 ; CCC129 +ccc; 130; CCC130 ; CCC130 +ccc; 132; CCC132 ; CCC132 +ccc; 133; CCC133 ; CCC133 # RESERVED +ccc; 200; ATBL ; Attached_Below_Left +ccc; 202; ATB ; Attached_Below +ccc; 214; ATA ; Attached_Above +ccc; 216; ATAR ; Attached_Above_Right +ccc; 218; BL ; Below_Left +ccc; 220; B ; Below +ccc; 222; BR ; Below_Right +ccc; 224; L ; Left +ccc; 226; R ; Right +ccc; 228; AL ; Above_Left +ccc; 230; A ; Above +ccc; 232; AR ; Above_Right +ccc; 233; DB ; Double_Below +ccc; 234; DA ; Double_Above +ccc; 240; IS ; Iota_Subscript + +# Case_Folding (cf) + +# @missing: 0000..10FFFF; Case_Folding; + +# Case_Ignorable (CI) + +CI ; N ; No ; F ; False +CI ; Y ; Yes ; T ; True + +# Cased (Cased) + +Cased; N ; No ; F ; False +Cased; Y ; Yes ; T ; True + +# Changes_When_Casefolded (CWCF) + +CWCF; N ; No ; F ; False +CWCF; Y ; Yes ; T ; True + +# Changes_When_Casemapped (CWCM) + +CWCM; N ; No ; F ; False +CWCM; Y ; Yes ; T ; True + +# Changes_When_Lowercased (CWL) + +CWL; N ; No ; F ; False +CWL; Y ; Yes ; T ; True + +# Changes_When_NFKC_Casefolded (CWKCF) + +CWKCF; N ; No ; F ; False +CWKCF; Y ; Yes ; T ; True + +# Changes_When_Titlecased (CWT) + +CWT; N ; No ; F ; False +CWT; Y ; Yes ; T ; True + +# Changes_When_Uppercased (CWU) + +CWU; N ; No ; F ; False +CWU; Y ; Yes ; T ; True + +# Composition_Exclusion (CE) + +CE ; N ; No ; F ; False +CE ; Y ; Yes ; T ; True + +# Dash (Dash) + +Dash; N ; No ; F ; False +Dash; Y ; Yes ; T ; True + +# Decomposition_Mapping (dm) + +# @missing: 0000..10FFFF; Decomposition_Mapping; + +# Decomposition_Type (dt) + +dt ; Can ; Canonical ; can +dt ; Com ; Compat ; com +dt ; Enc ; Circle ; enc +dt ; Fin ; Final ; fin +dt ; Font ; Font ; font +dt ; Fra ; Fraction ; fra +dt ; Init ; Initial ; init +dt ; Iso ; Isolated ; iso +dt ; Med ; Medial ; med +dt ; Nar ; Narrow ; nar +dt ; Nb ; Nobreak ; nb +dt ; None ; None ; none +dt ; Sml ; Small ; sml +dt ; Sqr ; Square ; sqr +dt ; Sub ; Sub ; sub +dt ; Sup ; Super ; sup +dt ; Vert ; Vertical ; vert +dt ; Wide ; Wide ; wide + +# Default_Ignorable_Code_Point (DI) + +DI ; N ; No ; F ; False +DI ; Y ; Yes ; T ; True + +# Deprecated (Dep) + +Dep; N ; No ; F ; False +Dep; Y ; Yes ; T ; True + +# Diacritic (Dia) + +Dia; N ; No ; F ; False +Dia; Y ; Yes ; T ; True + +# East_Asian_Width (ea) + +ea ; A ; Ambiguous +ea ; F ; Fullwidth +ea ; H ; Halfwidth +ea ; N ; Neutral +ea ; Na ; Narrow +ea ; W ; Wide + +# Emoji (Emoji) + +Emoji; N ; No ; F ; False +Emoji; Y ; Yes ; T ; True + +# Emoji_Component (EComp) + +EComp; N ; No ; F ; False +EComp; Y ; Yes ; T ; True + +# Emoji_Modifier (EMod) + +EMod; N ; No ; F ; False +EMod; Y ; Yes ; T ; True + +# Emoji_Modifier_Base (EBase) + +EBase; N ; No ; F ; False +EBase; Y ; Yes ; T ; True + +# Emoji_Presentation (EPres) + +EPres; N ; No ; F ; False +EPres; Y ; Yes ; T ; True + +# Equivalent_Unified_Ideograph (EqUIdeo) + + +# Expands_On_NFC (XO_NFC) + +XO_NFC; N ; No ; F ; False +XO_NFC; Y ; Yes ; T ; True + +# Expands_On_NFD (XO_NFD) + +XO_NFD; N ; No ; F ; False +XO_NFD; Y ; Yes ; T ; True + +# Expands_On_NFKC (XO_NFKC) + +XO_NFKC; N ; No ; F ; False +XO_NFKC; Y ; Yes ; T ; True + +# Expands_On_NFKD (XO_NFKD) + +XO_NFKD; N ; No ; F ; False +XO_NFKD; Y ; Yes ; T ; True + +# Extended_Pictographic (ExtPict) + +ExtPict; N ; No ; F ; False +ExtPict; Y ; Yes ; T ; True + +# Extender (Ext) + +Ext; N ; No ; F ; False +Ext; Y ; Yes ; T ; True + +# FC_NFKC_Closure (FC_NFKC) + +# @missing: 0000..10FFFF; FC_NFKC_Closure; + +# Full_Composition_Exclusion (Comp_Ex) + +Comp_Ex; N ; No ; F ; False +Comp_Ex; Y ; Yes ; T ; True + +# General_Category (gc) + +gc ; C ; Other # Cc | Cf | Cn | Co | Cs +gc ; Cc ; Control ; cntrl +gc ; Cf ; Format +gc ; Cn ; Unassigned +gc ; Co ; Private_Use +gc ; Cs ; Surrogate +gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu +gc ; LC ; Cased_Letter # Ll | Lt | Lu +gc ; Ll ; Lowercase_Letter +gc ; Lm ; Modifier_Letter +gc ; Lo ; Other_Letter +gc ; Lt ; Titlecase_Letter +gc ; Lu ; Uppercase_Letter +gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn +gc ; Mc ; Spacing_Mark +gc ; Me ; Enclosing_Mark +gc ; Mn ; Nonspacing_Mark +gc ; N ; Number # Nd | Nl | No +gc ; Nd ; Decimal_Number ; digit +gc ; Nl ; Letter_Number +gc ; No ; Other_Number +gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps +gc ; Pc ; Connector_Punctuation +gc ; Pd ; Dash_Punctuation +gc ; Pe ; Close_Punctuation +gc ; Pf ; Final_Punctuation +gc ; Pi ; Initial_Punctuation +gc ; Po ; Other_Punctuation +gc ; Ps ; Open_Punctuation +gc ; S ; Symbol # Sc | Sk | Sm | So +gc ; Sc ; Currency_Symbol +gc ; Sk ; Modifier_Symbol +gc ; Sm ; Math_Symbol +gc ; So ; Other_Symbol +gc ; Z ; Separator # Zl | Zp | Zs +gc ; Zl ; Line_Separator +gc ; Zp ; Paragraph_Separator +gc ; Zs ; Space_Separator +# @missing: 0000..10FFFF; General_Category; Unassigned + +# Grapheme_Base (Gr_Base) + +Gr_Base; N ; No ; F ; False +Gr_Base; Y ; Yes ; T ; True + +# Grapheme_Cluster_Break (GCB) + +GCB; CN ; Control +GCB; CR ; CR +GCB; EB ; E_Base +GCB; EBG ; E_Base_GAZ +GCB; EM ; E_Modifier +GCB; EX ; Extend +GCB; GAZ ; Glue_After_Zwj +GCB; L ; L +GCB; LF ; LF +GCB; LV ; LV +GCB; LVT ; LVT +GCB; PP ; Prepend +GCB; RI ; Regional_Indicator +GCB; SM ; SpacingMark +GCB; T ; T +GCB; V ; V +GCB; XX ; Other +GCB; ZWJ ; ZWJ + +# Grapheme_Extend (Gr_Ext) + +Gr_Ext; N ; No ; F ; False +Gr_Ext; Y ; Yes ; T ; True + +# Grapheme_Link (Gr_Link) + +Gr_Link; N ; No ; F ; False +Gr_Link; Y ; Yes ; T ; True + +# Hangul_Syllable_Type (hst) + +hst; L ; Leading_Jamo +hst; LV ; LV_Syllable +hst; LVT ; LVT_Syllable +hst; NA ; Not_Applicable +hst; T ; Trailing_Jamo +hst; V ; Vowel_Jamo + +# Hex_Digit (Hex) + +Hex; N ; No ; F ; False +Hex; Y ; Yes ; T ; True + +# Hyphen (Hyphen) + +Hyphen; N ; No ; F ; False +Hyphen; Y ; Yes ; T ; True + +# IDS_Binary_Operator (IDSB) + +IDSB; N ; No ; F ; False +IDSB; Y ; Yes ; T ; True + +# IDS_Trinary_Operator (IDST) + +IDST; N ; No ; F ; False +IDST; Y ; Yes ; T ; True + +# IDS_Unary_Operator (IDSU) + +IDSU; N ; No ; F ; False +IDSU; Y ; Yes ; T ; True + +# ID_Compat_Math_Continue (ID_Compat_Math_Continue) + +ID_Compat_Math_Continue; N ; No ; F ; False +ID_Compat_Math_Continue; Y ; Yes ; T ; True + +# ID_Compat_Math_Start (ID_Compat_Math_Start) + +ID_Compat_Math_Start; N ; No ; F ; False +ID_Compat_Math_Start; Y ; Yes ; T ; True + +# ID_Continue (IDC) + +IDC; N ; No ; F ; False +IDC; Y ; Yes ; T ; True + +# ID_Start (IDS) + +IDS; N ; No ; F ; False +IDS; Y ; Yes ; T ; True + +# ISO_Comment (isc) + +# @missing: 0000..10FFFF; ISO_Comment; + +# Ideographic (Ideo) + +Ideo; N ; No ; F ; False +Ideo; Y ; Yes ; T ; True + +# Indic_Conjunct_Break (InCB) + +InCB; Consonant ; Consonant +InCB; Extend ; Extend +InCB; Linker ; Linker +InCB; None ; None + +# Indic_Positional_Category (InPC) + +InPC; Bottom ; Bottom +InPC; Bottom_And_Left ; Bottom_And_Left +InPC; Bottom_And_Right ; Bottom_And_Right +InPC; Left ; Left +InPC; Left_And_Right ; Left_And_Right +InPC; NA ; NA +InPC; Overstruck ; Overstruck +InPC; Right ; Right +InPC; Top ; Top +InPC; Top_And_Bottom ; Top_And_Bottom +InPC; Top_And_Bottom_And_Left ; Top_And_Bottom_And_Left +InPC; Top_And_Bottom_And_Right ; Top_And_Bottom_And_Right +InPC; Top_And_Left ; Top_And_Left +InPC; Top_And_Left_And_Right ; Top_And_Left_And_Right +InPC; Top_And_Right ; Top_And_Right +InPC; Visual_Order_Left ; Visual_Order_Left + +# Indic_Syllabic_Category (InSC) + +InSC; Avagraha ; Avagraha +InSC; Bindu ; Bindu +InSC; Brahmi_Joining_Number ; Brahmi_Joining_Number +InSC; Cantillation_Mark ; Cantillation_Mark +InSC; Consonant ; Consonant +InSC; Consonant_Dead ; Consonant_Dead +InSC; Consonant_Final ; Consonant_Final +InSC; Consonant_Head_Letter ; Consonant_Head_Letter +InSC; Consonant_Initial_Postfixed ; Consonant_Initial_Postfixed +InSC; Consonant_Killer ; Consonant_Killer +InSC; Consonant_Medial ; Consonant_Medial +InSC; Consonant_Placeholder ; Consonant_Placeholder +InSC; Consonant_Preceding_Repha ; Consonant_Preceding_Repha +InSC; Consonant_Prefixed ; Consonant_Prefixed +InSC; Consonant_Subjoined ; Consonant_Subjoined +InSC; Consonant_Succeeding_Repha ; Consonant_Succeeding_Repha +InSC; Consonant_With_Stacker ; Consonant_With_Stacker +InSC; Gemination_Mark ; Gemination_Mark +InSC; Invisible_Stacker ; Invisible_Stacker +InSC; Joiner ; Joiner +InSC; Modifying_Letter ; Modifying_Letter +InSC; Non_Joiner ; Non_Joiner +InSC; Nukta ; Nukta +InSC; Number ; Number +InSC; Number_Joiner ; Number_Joiner +InSC; Other ; Other +InSC; Pure_Killer ; Pure_Killer +InSC; Register_Shifter ; Register_Shifter +InSC; Reordering_Killer ; Reordering_Killer +InSC; Syllable_Modifier ; Syllable_Modifier +InSC; Tone_Letter ; Tone_Letter +InSC; Tone_Mark ; Tone_Mark +InSC; Virama ; Virama +InSC; Visarga ; Visarga +InSC; Vowel ; Vowel +InSC; Vowel_Dependent ; Vowel_Dependent +InSC; Vowel_Independent ; Vowel_Independent + +# Jamo_Short_Name (JSN) + +JSN; A ; A +JSN; AE ; AE +JSN; B ; B +JSN; BB ; BB +JSN; BS ; BS +JSN; C ; C +JSN; D ; D +JSN; DD ; DD +JSN; E ; E +JSN; EO ; EO +JSN; EU ; EU +JSN; G ; G +JSN; GG ; GG +JSN; GS ; GS +JSN; H ; H +JSN; I ; I +JSN; J ; J +JSN; JJ ; JJ +JSN; K ; K +JSN; L ; L +JSN; LB ; LB +JSN; LG ; LG +JSN; LH ; LH +JSN; LM ; LM +JSN; LP ; LP +JSN; LS ; LS +JSN; LT ; LT +JSN; M ; M +JSN; N ; N +JSN; NG ; NG +JSN; NH ; NH +JSN; NJ ; NJ +JSN; O ; O +JSN; OE ; OE +JSN; P ; P +JSN; R ; R +JSN; S ; S +JSN; SS ; SS +JSN; T ; T +JSN; U ; U +JSN; WA ; WA +JSN; WAE ; WAE +JSN; WE ; WE +JSN; WEO ; WEO +JSN; WI ; WI +JSN; YA ; YA +JSN; YAE ; YAE +JSN; YE ; YE +JSN; YEO ; YEO +JSN; YI ; YI +JSN; YO ; YO +JSN; YU ; YU +# @missing: 0000..10FFFF; Jamo_Short_Name; + +# Join_Control (Join_C) + +Join_C; N ; No ; F ; False +Join_C; Y ; Yes ; T ; True + +# Joining_Group (jg) + +jg ; African_Feh ; African_Feh +jg ; African_Noon ; African_Noon +jg ; African_Qaf ; African_Qaf +jg ; Ain ; Ain +jg ; Alaph ; Alaph +jg ; Alef ; Alef +jg ; Beh ; Beh +jg ; Beth ; Beth +jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree +jg ; Dal ; Dal +jg ; Dalath_Rish ; Dalath_Rish +jg ; E ; E +jg ; Farsi_Yeh ; Farsi_Yeh +jg ; Fe ; Fe +jg ; Feh ; Feh +jg ; Final_Semkath ; Final_Semkath +jg ; Gaf ; Gaf +jg ; Gamal ; Gamal +jg ; Hah ; Hah +jg ; Hanifi_Rohingya_Kinna_Ya ; Hanifi_Rohingya_Kinna_Ya +jg ; Hanifi_Rohingya_Pa ; Hanifi_Rohingya_Pa +jg ; He ; He +jg ; Heh ; Heh +jg ; Heh_Goal ; Heh_Goal +jg ; Heth ; Heth +jg ; Kaf ; Kaf +jg ; Kaph ; Kaph +jg ; Kashmiri_Yeh ; Kashmiri_Yeh +jg ; Khaph ; Khaph +jg ; Knotted_Heh ; Knotted_Heh +jg ; Lam ; Lam +jg ; Lamadh ; Lamadh +jg ; Malayalam_Bha ; Malayalam_Bha +jg ; Malayalam_Ja ; Malayalam_Ja +jg ; Malayalam_Lla ; Malayalam_Lla +jg ; Malayalam_Llla ; Malayalam_Llla +jg ; Malayalam_Nga ; Malayalam_Nga +jg ; Malayalam_Nna ; Malayalam_Nna +jg ; Malayalam_Nnna ; Malayalam_Nnna +jg ; Malayalam_Nya ; Malayalam_Nya +jg ; Malayalam_Ra ; Malayalam_Ra +jg ; Malayalam_Ssa ; Malayalam_Ssa +jg ; Malayalam_Tta ; Malayalam_Tta +jg ; Manichaean_Aleph ; Manichaean_Aleph +jg ; Manichaean_Ayin ; Manichaean_Ayin +jg ; Manichaean_Beth ; Manichaean_Beth +jg ; Manichaean_Daleth ; Manichaean_Daleth +jg ; Manichaean_Dhamedh ; Manichaean_Dhamedh +jg ; Manichaean_Five ; Manichaean_Five +jg ; Manichaean_Gimel ; Manichaean_Gimel +jg ; Manichaean_Heth ; Manichaean_Heth +jg ; Manichaean_Hundred ; Manichaean_Hundred +jg ; Manichaean_Kaph ; Manichaean_Kaph +jg ; Manichaean_Lamedh ; Manichaean_Lamedh +jg ; Manichaean_Mem ; Manichaean_Mem +jg ; Manichaean_Nun ; Manichaean_Nun +jg ; Manichaean_One ; Manichaean_One +jg ; Manichaean_Pe ; Manichaean_Pe +jg ; Manichaean_Qoph ; Manichaean_Qoph +jg ; Manichaean_Resh ; Manichaean_Resh +jg ; Manichaean_Sadhe ; Manichaean_Sadhe +jg ; Manichaean_Samekh ; Manichaean_Samekh +jg ; Manichaean_Taw ; Manichaean_Taw +jg ; Manichaean_Ten ; Manichaean_Ten +jg ; Manichaean_Teth ; Manichaean_Teth +jg ; Manichaean_Thamedh ; Manichaean_Thamedh +jg ; Manichaean_Twenty ; Manichaean_Twenty +jg ; Manichaean_Waw ; Manichaean_Waw +jg ; Manichaean_Yodh ; Manichaean_Yodh +jg ; Manichaean_Zayin ; Manichaean_Zayin +jg ; Meem ; Meem +jg ; Mim ; Mim +jg ; No_Joining_Group ; No_Joining_Group +jg ; Noon ; Noon +jg ; Nun ; Nun +jg ; Nya ; Nya +jg ; Pe ; Pe +jg ; Qaf ; Qaf +jg ; Qaph ; Qaph +jg ; Reh ; Reh +jg ; Reversed_Pe ; Reversed_Pe +jg ; Rohingya_Yeh ; Rohingya_Yeh +jg ; Sad ; Sad +jg ; Sadhe ; Sadhe +jg ; Seen ; Seen +jg ; Semkath ; Semkath +jg ; Shin ; Shin +jg ; Straight_Waw ; Straight_Waw +jg ; Swash_Kaf ; Swash_Kaf +jg ; Syriac_Waw ; Syriac_Waw +jg ; Tah ; Tah +jg ; Taw ; Taw +jg ; Teh_Marbuta ; Teh_Marbuta +jg ; Teh_Marbuta_Goal ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teth ; Teth +jg ; Thin_Yeh ; Thin_Yeh +jg ; Vertical_Tail ; Vertical_Tail +jg ; Waw ; Waw +jg ; Yeh ; Yeh +jg ; Yeh_Barree ; Yeh_Barree +jg ; Yeh_With_Tail ; Yeh_With_Tail +jg ; Yudh ; Yudh +jg ; Yudh_He ; Yudh_He +jg ; Zain ; Zain +jg ; Zhain ; Zhain + +# Joining_Type (jt) + +jt ; C ; Join_Causing +jt ; D ; Dual_Joining +jt ; L ; Left_Joining +jt ; R ; Right_Joining +jt ; T ; Transparent +jt ; U ; Non_Joining + +# Line_Break (lb) + +lb ; AI ; Ambiguous +lb ; AK ; Aksara +lb ; AL ; Alphabetic +lb ; AP ; Aksara_Prebase +lb ; AS ; Aksara_Start +lb ; B2 ; Break_Both +lb ; BA ; Break_After +lb ; BB ; Break_Before +lb ; BK ; Mandatory_Break +lb ; CB ; Contingent_Break +lb ; CJ ; Conditional_Japanese_Starter +lb ; CL ; Close_Punctuation +lb ; CM ; Combining_Mark +lb ; CP ; Close_Parenthesis +lb ; CR ; Carriage_Return +lb ; EB ; E_Base +lb ; EM ; E_Modifier +lb ; EX ; Exclamation +lb ; GL ; Glue +lb ; H2 ; H2 +lb ; H3 ; H3 +lb ; HL ; Hebrew_Letter +lb ; HY ; Hyphen +lb ; ID ; Ideographic +lb ; IN ; Inseparable ; Inseperable +lb ; IS ; Infix_Numeric +lb ; JL ; JL +lb ; JT ; JT +lb ; JV ; JV +lb ; LF ; Line_Feed +lb ; NL ; Next_Line +lb ; NS ; Nonstarter +lb ; NU ; Numeric +lb ; OP ; Open_Punctuation +lb ; PO ; Postfix_Numeric +lb ; PR ; Prefix_Numeric +lb ; QU ; Quotation +lb ; RI ; Regional_Indicator +lb ; SA ; Complex_Context +lb ; SG ; Surrogate +lb ; SP ; Space +lb ; SY ; Break_Symbols +lb ; VF ; Virama_Final +lb ; VI ; Virama +lb ; WJ ; Word_Joiner +lb ; XX ; Unknown +lb ; ZW ; ZWSpace +lb ; ZWJ ; ZWJ + +# Logical_Order_Exception (LOE) + +LOE; N ; No ; F ; False +LOE; Y ; Yes ; T ; True + +# Lowercase (Lower) + +Lower; N ; No ; F ; False +Lower; Y ; Yes ; T ; True + +# Lowercase_Mapping (lc) + +# @missing: 0000..10FFFF; Lowercase_Mapping; + +# Math (Math) + +Math; N ; No ; F ; False +Math; Y ; Yes ; T ; True + +# Modifier_Combining_Mark (MCM) + +MCM; N ; No ; F ; False +MCM; Y ; Yes ; T ; True + +# NFC_Quick_Check (NFC_QC) + +NFC_QC; M ; Maybe +NFC_QC; N ; No +NFC_QC; Y ; Yes + +# NFD_Quick_Check (NFD_QC) + +NFD_QC; N ; No +NFD_QC; Y ; Yes + +# NFKC_Casefold (NFKC_CF) + + +# NFKC_Quick_Check (NFKC_QC) + +NFKC_QC; M ; Maybe +NFKC_QC; N ; No +NFKC_QC; Y ; Yes + +# NFKC_Simple_Casefold (NFKC_SCF) + + +# NFKD_Quick_Check (NFKD_QC) + +NFKD_QC; N ; No +NFKD_QC; Y ; Yes + +# Name (na) + +# @missing: 0000..10FFFF; Name; + +# Name_Alias (Name_Alias) + +# @missing: 0000..10FFFF; Name_Alias; + +# Noncharacter_Code_Point (NChar) + +NChar; N ; No ; F ; False +NChar; Y ; Yes ; T ; True + +# Numeric_Type (nt) + +nt ; De ; Decimal +nt ; Di ; Digit +nt ; None ; None +nt ; Nu ; Numeric + +# Numeric_Value (nv) + +# @missing: 0000..10FFFF; Numeric_Value; NaN + +# Other_Alphabetic (OAlpha) + +OAlpha; N ; No ; F ; False +OAlpha; Y ; Yes ; T ; True + +# Other_Default_Ignorable_Code_Point (ODI) + +ODI; N ; No ; F ; False +ODI; Y ; Yes ; T ; True + +# Other_Grapheme_Extend (OGr_Ext) + +OGr_Ext; N ; No ; F ; False +OGr_Ext; Y ; Yes ; T ; True + +# Other_ID_Continue (OIDC) + +OIDC; N ; No ; F ; False +OIDC; Y ; Yes ; T ; True + +# Other_ID_Start (OIDS) + +OIDS; N ; No ; F ; False +OIDS; Y ; Yes ; T ; True + +# Other_Lowercase (OLower) + +OLower; N ; No ; F ; False +OLower; Y ; Yes ; T ; True + +# Other_Math (OMath) + +OMath; N ; No ; F ; False +OMath; Y ; Yes ; T ; True + +# Other_Uppercase (OUpper) + +OUpper; N ; No ; F ; False +OUpper; Y ; Yes ; T ; True + +# Pattern_Syntax (Pat_Syn) + +Pat_Syn; N ; No ; F ; False +Pat_Syn; Y ; Yes ; T ; True + +# Pattern_White_Space (Pat_WS) + +Pat_WS; N ; No ; F ; False +Pat_WS; Y ; Yes ; T ; True + +# Prepended_Concatenation_Mark (PCM) + +PCM; N ; No ; F ; False +PCM; Y ; Yes ; T ; True + +# Quotation_Mark (QMark) + +QMark; N ; No ; F ; False +QMark; Y ; Yes ; T ; True + +# Radical (Radical) + +Radical; N ; No ; F ; False +Radical; Y ; Yes ; T ; True + +# Regional_Indicator (RI) + +RI ; N ; No ; F ; False +RI ; Y ; Yes ; T ; True + +# Script (sc) + +sc ; Adlm ; Adlam +sc ; Aghb ; Caucasian_Albanian +sc ; Ahom ; Ahom +sc ; Arab ; Arabic +sc ; Armi ; Imperial_Aramaic +sc ; Armn ; Armenian +sc ; Avst ; Avestan +sc ; Bali ; Balinese +sc ; Bamu ; Bamum +sc ; Bass ; Bassa_Vah +sc ; Batk ; Batak +sc ; Beng ; Bengali +sc ; Bhks ; Bhaiksuki +sc ; Bopo ; Bopomofo +sc ; Brah ; Brahmi +sc ; Brai ; Braille +sc ; Bugi ; Buginese +sc ; Buhd ; Buhid +sc ; Cakm ; Chakma +sc ; Cans ; Canadian_Aboriginal +sc ; Cari ; Carian +sc ; Cham ; Cham +sc ; Cher ; Cherokee +sc ; Chrs ; Chorasmian +sc ; Copt ; Coptic ; Qaac +sc ; Cpmn ; Cypro_Minoan +sc ; Cprt ; Cypriot +sc ; Cyrl ; Cyrillic +sc ; Deva ; Devanagari +sc ; Diak ; Dives_Akuru +sc ; Dogr ; Dogra +sc ; Dsrt ; Deseret +sc ; Dupl ; Duployan +sc ; Egyp ; Egyptian_Hieroglyphs +sc ; Elba ; Elbasan +sc ; Elym ; Elymaic +sc ; Ethi ; Ethiopic +sc ; Gara ; Garay +sc ; Geor ; Georgian +sc ; Glag ; Glagolitic +sc ; Gong ; Gunjala_Gondi +sc ; Gonm ; Masaram_Gondi +sc ; Goth ; Gothic +sc ; Gran ; Grantha +sc ; Grek ; Greek +sc ; Gujr ; Gujarati +sc ; Gukh ; Gurung_Khema +sc ; Guru ; Gurmukhi +sc ; Hang ; Hangul +sc ; Hani ; Han +sc ; Hano ; Hanunoo +sc ; Hatr ; Hatran +sc ; Hebr ; Hebrew +sc ; Hira ; Hiragana +sc ; Hluw ; Anatolian_Hieroglyphs +sc ; Hmng ; Pahawh_Hmong +sc ; Hmnp ; Nyiakeng_Puachue_Hmong +sc ; Hrkt ; Katakana_Or_Hiragana +sc ; Hung ; Old_Hungarian +sc ; Ital ; Old_Italic +sc ; Java ; Javanese +sc ; Kali ; Kayah_Li +sc ; Kana ; Katakana +sc ; Kawi ; Kawi +sc ; Khar ; Kharoshthi +sc ; Khmr ; Khmer +sc ; Khoj ; Khojki +sc ; Kits ; Khitan_Small_Script +sc ; Knda ; Kannada +sc ; Krai ; Kirat_Rai +sc ; Kthi ; Kaithi +sc ; Lana ; Tai_Tham +sc ; Laoo ; Lao +sc ; Latn ; Latin +sc ; Lepc ; Lepcha +sc ; Limb ; Limbu +sc ; Lina ; Linear_A +sc ; Linb ; Linear_B +sc ; Lisu ; Lisu +sc ; Lyci ; Lycian +sc ; Lydi ; Lydian +sc ; Mahj ; Mahajani +sc ; Maka ; Makasar +sc ; Mand ; Mandaic +sc ; Mani ; Manichaean +sc ; Marc ; Marchen +sc ; Medf ; Medefaidrin +sc ; Mend ; Mende_Kikakui +sc ; Merc ; Meroitic_Cursive +sc ; Mero ; Meroitic_Hieroglyphs +sc ; Mlym ; Malayalam +sc ; Modi ; Modi +sc ; Mong ; Mongolian +sc ; Mroo ; Mro +sc ; Mtei ; Meetei_Mayek +sc ; Mult ; Multani +sc ; Mymr ; Myanmar +sc ; Nagm ; Nag_Mundari +sc ; Nand ; Nandinagari +sc ; Narb ; Old_North_Arabian +sc ; Nbat ; Nabataean +sc ; Newa ; Newa +sc ; Nkoo ; Nko +sc ; Nshu ; Nushu +sc ; Ogam ; Ogham +sc ; Olck ; Ol_Chiki +sc ; Onao ; Ol_Onal +sc ; Orkh ; Old_Turkic +sc ; Orya ; Oriya +sc ; Osge ; Osage +sc ; Osma ; Osmanya +sc ; Ougr ; Old_Uyghur +sc ; Palm ; Palmyrene +sc ; Pauc ; Pau_Cin_Hau +sc ; Perm ; Old_Permic +sc ; Phag ; Phags_Pa +sc ; Phli ; Inscriptional_Pahlavi +sc ; Phlp ; Psalter_Pahlavi +sc ; Phnx ; Phoenician +sc ; Plrd ; Miao +sc ; Prti ; Inscriptional_Parthian +sc ; Rjng ; Rejang +sc ; Rohg ; Hanifi_Rohingya +sc ; Runr ; Runic +sc ; Samr ; Samaritan +sc ; Sarb ; Old_South_Arabian +sc ; Saur ; Saurashtra +sc ; Sgnw ; SignWriting +sc ; Shaw ; Shavian +sc ; Shrd ; Sharada +sc ; Sidd ; Siddham +sc ; Sind ; Khudawadi +sc ; Sinh ; Sinhala +sc ; Sogd ; Sogdian +sc ; Sogo ; Old_Sogdian +sc ; Sora ; Sora_Sompeng +sc ; Soyo ; Soyombo +sc ; Sund ; Sundanese +sc ; Sunu ; Sunuwar +sc ; Sylo ; Syloti_Nagri +sc ; Syrc ; Syriac +sc ; Tagb ; Tagbanwa +sc ; Takr ; Takri +sc ; Tale ; Tai_Le +sc ; Talu ; New_Tai_Lue +sc ; Taml ; Tamil +sc ; Tang ; Tangut +sc ; Tavt ; Tai_Viet +sc ; Telu ; Telugu +sc ; Tfng ; Tifinagh +sc ; Tglg ; Tagalog +sc ; Thaa ; Thaana +sc ; Thai ; Thai +sc ; Tibt ; Tibetan +sc ; Tirh ; Tirhuta +sc ; Tnsa ; Tangsa +sc ; Todr ; Todhri +sc ; Toto ; Toto +sc ; Tutg ; Tulu_Tigalari +sc ; Ugar ; Ugaritic +sc ; Vaii ; Vai +sc ; Vith ; Vithkuqi +sc ; Wara ; Warang_Citi +sc ; Wcho ; Wancho +sc ; Xpeo ; Old_Persian +sc ; Xsux ; Cuneiform +sc ; Yezi ; Yezidi +sc ; Yiii ; Yi +sc ; Zanb ; Zanabazar_Square +sc ; Zinh ; Inherited ; Qaai +sc ; Zyyy ; Common +sc ; Zzzz ; Unknown + +# Script_Extensions (scx) + + +# Sentence_Break (SB) + +SB ; AT ; ATerm +SB ; CL ; Close +SB ; CR ; CR +SB ; EX ; Extend +SB ; FO ; Format +SB ; LE ; OLetter +SB ; LF ; LF +SB ; LO ; Lower +SB ; NU ; Numeric +SB ; SC ; SContinue +SB ; SE ; Sep +SB ; SP ; Sp +SB ; ST ; STerm +SB ; UP ; Upper +SB ; XX ; Other + +# Sentence_Terminal (STerm) + +STerm; N ; No ; F ; False +STerm; Y ; Yes ; T ; True + +# Simple_Case_Folding (scf) + +# @missing: 0000..10FFFF; Simple_Case_Folding; + +# Simple_Lowercase_Mapping (slc) + +# @missing: 0000..10FFFF; Simple_Lowercase_Mapping; + +# Simple_Titlecase_Mapping (stc) + +# @missing: 0000..10FFFF; Simple_Titlecase_Mapping; + +# Simple_Uppercase_Mapping (suc) + +# @missing: 0000..10FFFF; Simple_Uppercase_Mapping; + +# Soft_Dotted (SD) + +SD ; N ; No ; F ; False +SD ; Y ; Yes ; T ; True + +# Terminal_Punctuation (Term) + +Term; N ; No ; F ; False +Term; Y ; Yes ; T ; True + +# Titlecase_Mapping (tc) + +# @missing: 0000..10FFFF; Titlecase_Mapping; + +# Unicode_1_Name (na1) + +# @missing: 0000..10FFFF; Unicode_1_Name; + +# Unified_Ideograph (UIdeo) + +UIdeo; N ; No ; F ; False +UIdeo; Y ; Yes ; T ; True + +# Uppercase (Upper) + +Upper; N ; No ; F ; False +Upper; Y ; Yes ; T ; True + +# Uppercase_Mapping (uc) + +# @missing: 0000..10FFFF; Uppercase_Mapping; + +# Variation_Selector (VS) + +VS ; N ; No ; F ; False +VS ; Y ; Yes ; T ; True + +# Vertical_Orientation (vo) + +vo ; R ; Rotated +vo ; Tr ; Transformed_Rotated +vo ; Tu ; Transformed_Upright +vo ; U ; Upright + +# White_Space (WSpace) + +WSpace; N ; No ; F ; False +WSpace; Y ; Yes ; T ; True + +# Word_Break (WB) + +WB ; CR ; CR +WB ; DQ ; Double_Quote +WB ; EB ; E_Base +WB ; EBG ; E_Base_GAZ +WB ; EM ; E_Modifier +WB ; EX ; ExtendNumLet +WB ; Extend ; Extend +WB ; FO ; Format +WB ; GAZ ; Glue_After_Zwj +WB ; HL ; Hebrew_Letter +WB ; KA ; Katakana +WB ; LE ; ALetter +WB ; LF ; LF +WB ; MB ; MidNumLet +WB ; ML ; MidLetter +WB ; MN ; MidNum +WB ; NL ; Newline +WB ; NU ; Numeric +WB ; RI ; Regional_Indicator +WB ; SQ ; Single_Quote +WB ; WSegSpace ; WSegSpace +WB ; XX ; Other +WB ; ZWJ ; ZWJ + +# XID_Continue (XIDC) + +XIDC; N ; No ; F ; False +XIDC; Y ; Yes ; T ; True + +# XID_Start (XIDS) + +XIDS; N ; No ; F ; False +XIDS; Y ; Yes ; T ; True + +# cjkAccountingNumeric (cjkAccountingNumeric) + +# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN + +# cjkCompatibilityVariant (cjkCompatibilityVariant) + +# @missing: 0000..10FFFF; cjkCompatibilityVariant; + +# cjkIICore (cjkIICore) + +# @missing: 0000..10FFFF; cjkIICore; + +# cjkIRG_GSource (cjkIRG_GSource) + +# @missing: 0000..10FFFF; cjkIRG_GSource; + +# cjkIRG_HSource (cjkIRG_HSource) + +# @missing: 0000..10FFFF; cjkIRG_HSource; + +# cjkIRG_JSource (cjkIRG_JSource) + +# @missing: 0000..10FFFF; cjkIRG_JSource; + +# cjkIRG_KPSource (cjkIRG_KPSource) + +# @missing: 0000..10FFFF; cjkIRG_KPSource; + +# cjkIRG_KSource (cjkIRG_KSource) + +# @missing: 0000..10FFFF; cjkIRG_KSource; + +# cjkIRG_MSource (cjkIRG_MSource) + +# @missing: 0000..10FFFF; cjkIRG_MSource; + +# cjkIRG_SSource (cjkIRG_SSource) + +# @missing: 0000..10FFFF; cjkIRG_SSource; + +# cjkIRG_TSource (cjkIRG_TSource) + +# @missing: 0000..10FFFF; cjkIRG_TSource; + +# cjkIRG_UKSource (cjkIRG_UKSource) + +# @missing: 0000..10FFFF; cjkIRG_UKSource; + +# cjkIRG_USource (cjkIRG_USource) + +# @missing: 0000..10FFFF; cjkIRG_USource; + +# cjkIRG_VSource (cjkIRG_VSource) + +# @missing: 0000..10FFFF; cjkIRG_VSource; + +# cjkOtherNumeric (cjkOtherNumeric) + +# @missing: 0000..10FFFF; cjkOtherNumeric; NaN + +# cjkPrimaryNumeric (cjkPrimaryNumeric) + +# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN + +# cjkRSUnicode (cjkRSUnicode) + +# @missing: 0000..10FFFF; cjkRSUnicode; + +# kEH_Cat (kEH_Cat) + +# @missing: 0000..10FFFF; kEH_Cat; + +# kEH_Desc (kEH_Desc) + +# @missing: 0000..10FFFF; kEH_Desc; + +# kEH_HG (kEH_HG) + +# @missing: 0000..10FFFF; kEH_HG; + +# kEH_IFAO (kEH_IFAO) + +# @missing: 0000..10FFFF; kEH_IFAO; + +# kEH_JSesh (kEH_JSesh) + +# @missing: 0000..10FFFF; kEH_JSesh; + +# kEH_NoMirror (kEH_NoMirror) + +kEH_NoMirror; N ; No ; F ; False +kEH_NoMirror; Y ; Yes ; T ; True + +# kEH_NoRotate (kEH_NoRotate) + +kEH_NoRotate; N ; No ; F ; False +kEH_NoRotate; Y ; Yes ; T ; True + +# EOF diff --git a/3rd/pcre2/maint/Unicode.tables/ScriptExtensions.txt b/3rd/pcre2/maint/Unicode.tables/ScriptExtensions.txt new file mode 100644 index 00000000..140901a8 --- /dev/null +++ b/3rd/pcre2/maint/Unicode.tables/ScriptExtensions.txt @@ -0,0 +1,233 @@ +# ScriptExtensions-16.0.0.txt +# Date: 2024-07-30, 19:38:00 GMT +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# The Script_Extensions property indicates which characters are commonly used +# with more than one script, but with a limited number of scripts. +# For each code point, there is one or more property values. Each such value is a Script property value. +# For more information, see: +# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ +# Especially the sections: +# https://www.unicode.org/reports/tr24/#Assignment_Script_Values +# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# +# Each Script_Extensions value in this file consists of a set +# of one or more abbreviated Script property values. The ordering of the +# values in that set is not material, but for stability in presentation +# it is given here as alphabetical. +# +# All code points not explicitly listed for Script_Extensions +# have as their value the corresponding Script property value. +# +# @missing: 0000..10FFFF;