289 lines
7.3 KiB
C++
289 lines
7.3 KiB
C++
#include "piregularexpression.h"
|
|
|
|
// clang-format off
|
|
#define PCRE2_CODE_UNIT_WIDTH 16
|
|
#include <pcre2.h>
|
|
#include <pistringlist.h>
|
|
// clang-format on
|
|
|
|
|
|
PRIVATE_DEFINITION_START(PIRegularExpression)
|
|
pcre2_code * compiled = nullptr;
|
|
PIString error_msg;
|
|
PCRE2_SIZE error_offset = 0;
|
|
|
|
pcre2_match_data * match_data = nullptr;
|
|
|
|
int capture_count = -1;
|
|
PIMap<PIString, int> named_group_index;
|
|
PIMap<int, PIString> named_group_name;
|
|
|
|
bool isCompiled() const {
|
|
return compiled && match_data;
|
|
}
|
|
void free() {
|
|
named_group_index.clear();
|
|
named_group_name.clear();
|
|
error_msg.clear();
|
|
error_offset = 0;
|
|
capture_count = -1;
|
|
if (match_data) {
|
|
pcre2_match_data_free(match_data);
|
|
match_data = nullptr;
|
|
}
|
|
if (compiled) {
|
|
pcre2_code_free(compiled);
|
|
compiled = nullptr;
|
|
}
|
|
}
|
|
|
|
PIString getNEString(const void * ptr, uint32_t max_size) {
|
|
PIString ret;
|
|
auto * cptr = (PIChar *)ptr;
|
|
uint32_t sz = 0;
|
|
while (*cptr != PIChar()) {
|
|
ret.append(*cptr);
|
|
cptr++;
|
|
if (++sz > max_size) break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
uint32_t convertOptions(Options opt) {
|
|
uint32_t ret = PCRE2_UTF | PCRE2_NO_UTF_CHECK;
|
|
// clang-format off
|
|
if (opt[CaseInsensitive ]) ret |= PCRE2_CASELESS ;
|
|
if (opt[Singleline ]) ret |= PCRE2_FIRSTLINE;
|
|
if (opt[Multiline ]) ret |= PCRE2_MULTILINE;
|
|
if (opt[InvertedGreediness]) ret |= PCRE2_UNGREEDY ;
|
|
if (opt[Extended ]) ret |= PCRE2_EXTENDED ;
|
|
// clang-format on
|
|
return ret;
|
|
}
|
|
|
|
bool compile(PIString & pat, Options opt) {
|
|
free();
|
|
if (pat.isEmpty()) return false;
|
|
auto * pat_ptr = &(pat[0]);
|
|
int error_number = 0;
|
|
compiled = pcre2_compile((PCRE2_SPTR)pat_ptr, pat.size(), convertOptions(opt), &error_number, &error_offset, nullptr);
|
|
if (!compiled) {
|
|
PIChar buffer[256];
|
|
int sz = pcre2_get_error_message(error_number, (PCRE2_UCHAR16 *)buffer, sizeof(buffer));
|
|
error_msg = PIString(buffer, sz);
|
|
return false;
|
|
// printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer);
|
|
}
|
|
error_msg.clear();
|
|
match_data = pcre2_match_data_create_from_pattern(compiled, nullptr);
|
|
|
|
uint32_t namecount = 0, name_entry_size = 0, cap_cout = 0;
|
|
PCRE2_SPTR name_table = nullptr;
|
|
pcre2_pattern_info(compiled, PCRE2_INFO_CAPTURECOUNT, &cap_cout);
|
|
pcre2_pattern_info(compiled, PCRE2_INFO_NAMECOUNT, &namecount);
|
|
pcre2_pattern_info(compiled, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
|
pcre2_pattern_info(compiled, PCRE2_INFO_NAMETABLE, &name_table);
|
|
capture_count = cap_cout;
|
|
auto tabptr = name_table;
|
|
for (uint32_t i = 0; i < namecount; i++) {
|
|
int gnum = *(ushort *)tabptr;
|
|
PIString gname = getNEString(tabptr + 1, name_entry_size);
|
|
named_group_index[gname] = gnum;
|
|
named_group_name[gnum] = gname;
|
|
tabptr += name_entry_size;
|
|
}
|
|
|
|
return isCompiled();
|
|
}
|
|
|
|
void match(Matcher & ret) {
|
|
int rc = pcre2_match(compiled,
|
|
(PCRE2_SPTR)ret.subjectPtr(),
|
|
ret.subject->size(),
|
|
ret.start_offset,
|
|
PCRE2_NO_UTF_CHECK,
|
|
match_data,
|
|
nullptr);
|
|
ret.has_match = ret.is_error = false;
|
|
ret.groups.clear();
|
|
if (rc == PCRE2_ERROR_NOMATCH) return;
|
|
if (rc < 0) {
|
|
ret.is_error = true;
|
|
} else {
|
|
ret.has_match = true;
|
|
auto ovector = pcre2_get_ovector_pointer(match_data);
|
|
for (int i = 0; i < rc; i++) {
|
|
Matcher::Group g;
|
|
g.index = ovector[2 * i];
|
|
g.size = ovector[2 * i + 1] - ovector[2 * i];
|
|
// g.string = PIString(&(sub_ptr[g.index]), g.size);
|
|
ret.groups << g;
|
|
}
|
|
ret.start_offset = ovector[1];
|
|
}
|
|
}
|
|
|
|
PRIVATE_DEFINITION_END(PIRegularExpression)
|
|
|
|
|
|
PIRegularExpression::PIRegularExpression(const PIString & pattern, Options opt) {
|
|
setPattern(pattern, opt);
|
|
}
|
|
|
|
|
|
PIRegularExpression::PIRegularExpression(const PIRegularExpression & o) {
|
|
setPattern(o.pat_, o.opt_);
|
|
}
|
|
|
|
|
|
PIRegularExpression & PIRegularExpression::operator=(const PIRegularExpression & o) {
|
|
setPattern(o.pat_, o.opt_);
|
|
return *this;
|
|
}
|
|
|
|
|
|
PIRegularExpression::~PIRegularExpression() {
|
|
PRIVATE->free();
|
|
}
|
|
|
|
|
|
void PIRegularExpression::setPattern(const PIString & pattern) {
|
|
pat_ = pattern;
|
|
PRIVATE->compile(pat_, opt_);
|
|
}
|
|
|
|
|
|
void PIRegularExpression::setPattern(const PIString & pattern, Options opt) {
|
|
opt_ = opt;
|
|
setPattern(pattern);
|
|
}
|
|
|
|
|
|
bool PIRegularExpression::isValid() const {
|
|
return PRIVATE->isCompiled();
|
|
}
|
|
|
|
|
|
PIString PIRegularExpression::errorString() const {
|
|
return PRIVATE->error_msg;
|
|
}
|
|
|
|
|
|
int PIRegularExpression::errorPosition() const {
|
|
return PRIVATE->error_offset;
|
|
}
|
|
|
|
|
|
int PIRegularExpression::captureGroupsCount() const {
|
|
return PRIVATE->capture_count;
|
|
}
|
|
|
|
|
|
PIStringList PIRegularExpression::captureGroupNames() const {
|
|
return PRIVATE->named_group_name.values();
|
|
}
|
|
|
|
|
|
PIString PIRegularExpression::captureGroupName(int index) const {
|
|
return PRIVATE->named_group_name.value(index);
|
|
}
|
|
|
|
|
|
int PIRegularExpression::captureGroupIndex(const PIString & gname) const {
|
|
return PRIVATE->named_group_index.value(gname);
|
|
}
|
|
|
|
|
|
PIRegularExpression::Matcher PIRegularExpression::makeMatcher(PIString & subject, size_t offset) {
|
|
PIRegularExpression::Matcher ret(this);
|
|
ret.start_offset = offset;
|
|
ret.subject = &subject;
|
|
return ret;
|
|
}
|
|
|
|
|
|
PIRegularExpression::Matcher PIRegularExpression::makeMatcher(const PIString & subject, size_t offset) {
|
|
PIRegularExpression::Matcher ret(this);
|
|
ret.start_offset = offset;
|
|
ret.subject_own = subject;
|
|
ret.subject = &ret.subject_own;
|
|
return ret;
|
|
}
|
|
|
|
|
|
PIRegularExpression::Matcher PIRegularExpression::match(PIString & subject, size_t offset) {
|
|
PIRegularExpression::Matcher ret = makeMatcher(subject, offset);
|
|
PRIVATE->match(ret);
|
|
return ret;
|
|
}
|
|
|
|
|
|
PIRegularExpression::Matcher PIRegularExpression::match(const PIString & subject, size_t offset) {
|
|
PIRegularExpression::Matcher ret = makeMatcher(subject, offset);
|
|
PRIVATE->match(ret);
|
|
return ret;
|
|
}
|
|
|
|
|
|
PIRegularExpression::Matcher::Matcher(PIRegularExpression * p): parent(p) {}
|
|
|
|
|
|
PIChar * PIRegularExpression::Matcher::subjectPtr() const {
|
|
if (!subject) return nullptr;
|
|
return &(*subject)[0];
|
|
}
|
|
|
|
|
|
bool PIRegularExpression::Matcher::hasMatch() const {
|
|
return has_match;
|
|
}
|
|
|
|
|
|
bool PIRegularExpression::Matcher::next() {
|
|
parent->PRIVATEWB->match(*this);
|
|
return hasMatch();
|
|
}
|
|
|
|
|
|
PIStringList PIRegularExpression::Matcher::matchedStrings() const {
|
|
if (!subject) return {};
|
|
PIStringList ret;
|
|
for (const auto & g: groups)
|
|
ret << subject->mid(g.index, g.size);
|
|
return ret;
|
|
}
|
|
|
|
|
|
PIString PIRegularExpression::Matcher::matchedString(int index) const {
|
|
if (index < 0 || index >= groups.size_s()) return {};
|
|
if (!subject) return {};
|
|
return subject->mid(groups[index].index, groups[index].size);
|
|
}
|
|
|
|
|
|
int PIRegularExpression::Matcher::matchedStart(int index) const {
|
|
if (index < 0 || index >= groups.size_s()) return -1;
|
|
return groups[index].index;
|
|
}
|
|
|
|
|
|
int PIRegularExpression::Matcher::matchedSize(int index) const {
|
|
if (index < 0 || index >= groups.size_s()) return -1;
|
|
return groups[index].size;
|
|
}
|
|
|
|
|
|
PIString PIRegularExpression::Matcher::matchedString(const PIString & gname) const {
|
|
return matchedString(parent->PRIVATEWB->named_group_index.value(gname));
|
|
}
|
|
|
|
|
|
int PIRegularExpression::Matcher::matchedStart(const PIString & gname) const {
|
|
return matchedStart(parent->PRIVATEWB->named_group_index.value(gname));
|
|
}
|
|
|
|
|
|
int PIRegularExpression::Matcher::matchedSize(const PIString & gname) const {
|
|
return matchedSize(parent->PRIVATEWB->named_group_index.value(gname));
|
|
}
|