#include "piregularexpression.h" // clang-format off #define PCRE2_CODE_UNIT_WIDTH 16 #include #include // clang-format on PRIVATE_DEFINITION_START(PIRegularExpression) pcre2_code * compiled = nullptr; PIString error_msg; PCRE2_SIZE error_offset = 0; pcre2_match_data * match_data = nullptr; int capture_count = -1; PIMap named_group_index; PIMap named_group_name; bool isCompiled() const { return compiled && match_data; } void free() { named_group_index.clear(); named_group_name.clear(); error_msg.clear(); error_offset = 0; capture_count = -1; if (match_data) { pcre2_match_data_free(match_data); match_data = nullptr; } if (compiled) { pcre2_code_free(compiled); compiled = nullptr; } } PIString getNEString(const void * ptr, uint32_t max_size) { PIString ret; auto * cptr = (PIChar *)ptr; uint32_t sz = 0; while (*cptr != PIChar()) { ret.append(*cptr); cptr++; if (++sz > max_size) break; } return ret; } uint32_t convertOptions(Options opt) { uint32_t ret = PCRE2_UTF | PCRE2_NO_UTF_CHECK; // clang-format off if (opt[CaseInsensitive ]) ret |= PCRE2_CASELESS ; if (opt[Singleline ]) ret |= PCRE2_FIRSTLINE; if (opt[Multiline ]) ret |= PCRE2_MULTILINE; if (opt[InvertedGreediness]) ret |= PCRE2_UNGREEDY ; if (opt[Extended ]) ret |= PCRE2_EXTENDED ; // clang-format on return ret; } bool compile(PIString & pat, Options opt) { free(); if (pat.isEmpty()) return false; auto * pat_ptr = &(pat[0]); int error_number = 0; compiled = pcre2_compile((PCRE2_SPTR)pat_ptr, pat.size(), convertOptions(opt), &error_number, &error_offset, nullptr); if (!compiled) { PIChar buffer[256]; int sz = pcre2_get_error_message(error_number, (PCRE2_UCHAR16 *)buffer, sizeof(buffer)); error_msg = PIString(buffer, sz); return false; // printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer); } error_msg.clear(); match_data = pcre2_match_data_create_from_pattern(compiled, nullptr); uint32_t namecount = 0, name_entry_size = 0, cap_cout = 0; PCRE2_SPTR name_table = nullptr; pcre2_pattern_info(compiled, PCRE2_INFO_CAPTURECOUNT, &cap_cout); pcre2_pattern_info(compiled, PCRE2_INFO_NAMECOUNT, &namecount); pcre2_pattern_info(compiled, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); pcre2_pattern_info(compiled, PCRE2_INFO_NAMETABLE, &name_table); capture_count = cap_cout; auto tabptr = name_table; for (uint32_t i = 0; i < namecount; i++) { int gnum = *(ushort *)tabptr; PIString gname = getNEString(tabptr + 1, name_entry_size); named_group_index[gname] = gnum; named_group_name[gnum] = gname; tabptr += name_entry_size; } return isCompiled(); } void match(Matcher & ret) { int rc = pcre2_match(compiled, (PCRE2_SPTR)ret.subjectPtr(), ret.subject->size(), ret.start_offset, PCRE2_NO_UTF_CHECK, match_data, nullptr); ret.has_match = ret.is_error = false; ret.groups.clear(); if (rc == PCRE2_ERROR_NOMATCH) return; if (rc < 0) { ret.is_error = true; } else { ret.has_match = true; auto ovector = pcre2_get_ovector_pointer(match_data); for (int i = 0; i < rc; i++) { Matcher::Group g; g.index = ovector[2 * i]; g.size = ovector[2 * i + 1] - ovector[2 * i]; // g.string = PIString(&(sub_ptr[g.index]), g.size); ret.groups << g; } ret.start_offset = ovector[1]; } } PRIVATE_DEFINITION_END(PIRegularExpression) PIRegularExpression::PIRegularExpression(const PIString & pattern, Options opt) { setPattern(pattern, opt); } PIRegularExpression::PIRegularExpression(const PIRegularExpression & o) { setPattern(o.pat_, o.opt_); } PIRegularExpression & PIRegularExpression::operator=(const PIRegularExpression & o) { setPattern(o.pat_, o.opt_); return *this; } PIRegularExpression::~PIRegularExpression() { PRIVATE->free(); } void PIRegularExpression::setPattern(const PIString & pattern) { pat_ = pattern; PRIVATE->compile(pat_, opt_); } void PIRegularExpression::setPattern(const PIString & pattern, Options opt) { opt_ = opt; setPattern(pattern); } bool PIRegularExpression::isValid() const { return PRIVATE->isCompiled(); } PIString PIRegularExpression::errorString() const { return PRIVATE->error_msg; } int PIRegularExpression::errorPosition() const { return PRIVATE->error_offset; } int PIRegularExpression::captureGroupsCount() const { return PRIVATE->capture_count; } PIStringList PIRegularExpression::captureGroupNames() const { return PRIVATE->named_group_name.values(); } PIString PIRegularExpression::captureGroupName(int index) const { return PRIVATE->named_group_name.value(index); } int PIRegularExpression::captureGroupIndex(const PIString & gname) const { return PRIVATE->named_group_index.value(gname); } PIRegularExpression::Matcher PIRegularExpression::makeMatcher(PIString & subject, size_t offset) { PIRegularExpression::Matcher ret(this); ret.start_offset = offset; ret.subject = &subject; return ret; } PIRegularExpression::Matcher PIRegularExpression::makeMatcher(const PIString & subject, size_t offset) { PIRegularExpression::Matcher ret(this); ret.start_offset = offset; ret.subject_own = subject; ret.subject = &ret.subject_own; return ret; } PIRegularExpression::Matcher PIRegularExpression::match(PIString & subject, size_t offset) { PIRegularExpression::Matcher ret = makeMatcher(subject, offset); PRIVATE->match(ret); return ret; } PIRegularExpression::Matcher PIRegularExpression::match(const PIString & subject, size_t offset) { PIRegularExpression::Matcher ret = makeMatcher(subject, offset); PRIVATE->match(ret); return ret; } PIRegularExpression::Matcher::Matcher(PIRegularExpression * p): parent(p) {} PIChar * PIRegularExpression::Matcher::subjectPtr() const { if (!subject) return nullptr; return &(*subject)[0]; } bool PIRegularExpression::Matcher::hasMatch() const { return has_match; } bool PIRegularExpression::Matcher::next() { parent->PRIVATEWB->match(*this); return hasMatch(); } PIStringList PIRegularExpression::Matcher::matchedStrings() const { if (!subject) return {}; PIStringList ret; for (const auto & g: groups) ret << subject->mid(g.index, g.size); return ret; } PIString PIRegularExpression::Matcher::matchedString(int index) const { if (index < 0 || index >= groups.size_s()) return {}; if (!subject) return {}; return subject->mid(groups[index].index, groups[index].size); } int PIRegularExpression::Matcher::matchedStart(int index) const { if (index < 0 || index >= groups.size_s()) return -1; return groups[index].index; } int PIRegularExpression::Matcher::matchedSize(int index) const { if (index < 0 || index >= groups.size_s()) return -1; return groups[index].size; } PIString PIRegularExpression::Matcher::matchedString(const PIString & gname) const { return matchedString(parent->PRIVATEWB->named_group_index.value(gname)); } int PIRegularExpression::Matcher::matchedStart(const PIString & gname) const { return matchedStart(parent->PRIVATEWB->named_group_index.value(gname)); } int PIRegularExpression::Matcher::matchedSize(const PIString & gname) const { return matchedSize(parent->PRIVATEWB->named_group_index.value(gname)); } PIRegularExpression PIRegularExpression::fromGlob(const PIString & pattern, Options opt) { PIRegularExpression ret; ret.convertFrom(pattern, PCRE2_CONVERT_GLOB, opt); return ret; } PIRegularExpression PIRegularExpression::fromPOSIX(const PIString & pattern, Options opt) { PIRegularExpression ret; ret.convertFrom(pattern, PCRE2_CONVERT_POSIX_BASIC, opt); return ret; } void PIRegularExpression::convertFrom(const PIString & pattern, uint type, Options opt) { if (pattern.isEmpty()) return; PIChar * cptr = &((PIString &)pattern)[0]; PCRE2_UCHAR * out = nullptr; PCRE2_SIZE out_size = 0; int rc = pcre2_pattern_convert((PCRE2_SPTR)cptr, pattern.size_s(), type | PCRE2_CONVERT_UTF | PCRE2_CONVERT_NO_UTF_CHECK, &out, &out_size, nullptr); if (rc != 0) { piCout << "PIRegularExpression::convertFrom error" << rc; } else { setPattern(PIString((PIChar *)out, out_size), opt); } pcre2_converted_pattern_free(out); }