add PIRegularExpression

This commit is contained in:
2025-08-11 14:23:29 +03:00
parent 91955d44fa
commit 654c0847b2
481 changed files with 434858 additions and 0 deletions

View File

@@ -36,6 +36,7 @@
#include "piserializationmodule.h"
#include "pistatemachinemodule.h"
#include "pisystemmodule.h"
#include "pitextmodule.h"
#include "pithreadmodule.h"
#endif // PIP_H

View File

@@ -0,0 +1,288 @@
#include "piregularexpression.h"
// clang-format off
#define PCRE2_CODE_UNIT_WIDTH 16
#include <pcre2.h>
#include <pistringlist.h>
// clang-format on
PRIVATE_DEFINITION_START(PIRegularExpression)
pcre2_code * compiled = nullptr;
PIString error_msg;
PCRE2_SIZE error_offset = 0;
pcre2_match_data * match_data = nullptr;
int capture_count = -1;
PIMap<PIString, int> named_group_index;
PIMap<int, PIString> named_group_name;
bool isCompiled() const {
return compiled && match_data;
}
void free() {
named_group_index.clear();
named_group_name.clear();
error_msg.clear();
error_offset = 0;
capture_count = -1;
if (match_data) {
pcre2_match_data_free(match_data);
match_data = nullptr;
}
if (compiled) {
pcre2_code_free(compiled);
compiled = nullptr;
}
}
PIString getNEString(const void * ptr, uint32_t max_size) {
PIString ret;
auto * cptr = (PIChar *)ptr;
uint32_t sz = 0;
while (*cptr != PIChar()) {
ret.append(*cptr);
cptr++;
if (++sz > max_size) break;
}
return ret;
}
uint32_t convertOptions(Options opt) {
uint32_t ret = PCRE2_UTF | PCRE2_NO_UTF_CHECK;
// clang-format off
if (opt[CaseInsensitive ]) ret |= PCRE2_CASELESS ;
if (opt[Singleline ]) ret |= PCRE2_FIRSTLINE;
if (opt[Multiline ]) ret |= PCRE2_MULTILINE;
if (opt[InvertedGreediness]) ret |= PCRE2_UNGREEDY ;
if (opt[Extended ]) ret |= PCRE2_EXTENDED ;
// clang-format on
return ret;
}
bool compile(PIString & pat, Options opt) {
free();
if (pat.isEmpty()) return false;
auto * pat_ptr = &(pat[0]);
int error_number = 0;
compiled = pcre2_compile((PCRE2_SPTR)pat_ptr, pat.size(), convertOptions(opt), &error_number, &error_offset, nullptr);
if (!compiled) {
PIChar buffer[256];
int sz = pcre2_get_error_message(error_number, (PCRE2_UCHAR16 *)buffer, sizeof(buffer));
error_msg = PIString(buffer, sz);
return false;
// printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer);
}
error_msg.clear();
match_data = pcre2_match_data_create_from_pattern(compiled, nullptr);
uint32_t namecount = 0, name_entry_size = 0, cap_cout = 0;
PCRE2_SPTR name_table = nullptr;
pcre2_pattern_info(compiled, PCRE2_INFO_CAPTURECOUNT, &cap_cout);
pcre2_pattern_info(compiled, PCRE2_INFO_NAMECOUNT, &namecount);
pcre2_pattern_info(compiled, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
pcre2_pattern_info(compiled, PCRE2_INFO_NAMETABLE, &name_table);
capture_count = cap_cout;
auto tabptr = name_table;
for (uint32_t i = 0; i < namecount; i++) {
int gnum = *(ushort *)tabptr;
PIString gname = getNEString(tabptr + 1, name_entry_size);
named_group_index[gname] = gnum;
named_group_name[gnum] = gname;
tabptr += name_entry_size;
}
return isCompiled();
}
void match(Matcher & ret) {
int rc = pcre2_match(compiled,
(PCRE2_SPTR)ret.subjectPtr(),
ret.subject->size(),
ret.start_offset,
PCRE2_NO_UTF_CHECK,
match_data,
nullptr);
ret.has_match = ret.is_error = false;
ret.groups.clear();
if (rc == PCRE2_ERROR_NOMATCH) return;
if (rc < 0) {
ret.is_error = true;
} else {
ret.has_match = true;
auto ovector = pcre2_get_ovector_pointer(match_data);
for (int i = 0; i < rc; i++) {
Matcher::Group g;
g.index = ovector[2 * i];
g.size = ovector[2 * i + 1] - ovector[2 * i];
// g.string = PIString(&(sub_ptr[g.index]), g.size);
ret.groups << g;
}
ret.start_offset = ovector[1];
}
}
PRIVATE_DEFINITION_END(PIRegularExpression)
PIRegularExpression::PIRegularExpression(const PIString & pattern, Options opt) {
setPattern(pattern, opt);
}
PIRegularExpression::PIRegularExpression(const PIRegularExpression & o) {
setPattern(o.pat_, o.opt_);
}
PIRegularExpression & PIRegularExpression::operator=(const PIRegularExpression & o) {
setPattern(o.pat_, o.opt_);
return *this;
}
PIRegularExpression::~PIRegularExpression() {
PRIVATE->free();
}
void PIRegularExpression::setPattern(const PIString & pattern) {
pat_ = pattern;
PRIVATE->compile(pat_, opt_);
}
void PIRegularExpression::setPattern(const PIString & pattern, Options opt) {
opt_ = opt;
setPattern(pattern);
}
bool PIRegularExpression::isValid() const {
return PRIVATE->isCompiled();
}
PIString PIRegularExpression::errorString() const {
return PRIVATE->error_msg;
}
int PIRegularExpression::errorPosition() const {
return PRIVATE->error_offset;
}
int PIRegularExpression::captureGroupsCount() const {
return PRIVATE->capture_count;
}
PIStringList PIRegularExpression::captureGroupNames() const {
return PRIVATE->named_group_name.values();
}
PIString PIRegularExpression::captureGroupName(int index) const {
return PRIVATE->named_group_name.value(index);
}
int PIRegularExpression::captureGroupIndex(const PIString & gname) const {
return PRIVATE->named_group_index.value(gname);
}
PIRegularExpression::Matcher PIRegularExpression::makeMatcher(PIString & subject, size_t offset) {
PIRegularExpression::Matcher ret(this);
ret.start_offset = offset;
ret.subject = &subject;
return ret;
}
PIRegularExpression::Matcher PIRegularExpression::makeMatcher(const PIString & subject, size_t offset) {
PIRegularExpression::Matcher ret(this);
ret.start_offset = offset;
ret.subject_own = subject;
ret.subject = &ret.subject_own;
return ret;
}
PIRegularExpression::Matcher PIRegularExpression::match(PIString & subject, size_t offset) {
PIRegularExpression::Matcher ret = makeMatcher(subject, offset);
PRIVATE->match(ret);
return ret;
}
PIRegularExpression::Matcher PIRegularExpression::match(const PIString & subject, size_t offset) {
PIRegularExpression::Matcher ret = makeMatcher(subject, offset);
PRIVATE->match(ret);
return ret;
}
PIRegularExpression::Matcher::Matcher(PIRegularExpression * p): parent(p) {}
PIChar * PIRegularExpression::Matcher::subjectPtr() const {
if (!subject) return nullptr;
return &(*subject)[0];
}
bool PIRegularExpression::Matcher::hasMatch() const {
return has_match;
}
bool PIRegularExpression::Matcher::next() {
parent->PRIVATEWB->match(*this);
return hasMatch();
}
PIStringList PIRegularExpression::Matcher::matchedStrings() const {
if (!subject) return {};
PIStringList ret;
for (const auto & g: groups)
ret << subject->mid(g.index, g.size);
return ret;
}
PIString PIRegularExpression::Matcher::matchedString(int index) const {
if (index < 0 || index >= groups.size_s()) return {};
if (!subject) return {};
return subject->mid(groups[index].index, groups[index].size);
}
int PIRegularExpression::Matcher::matchedStart(int index) const {
if (index < 0 || index >= groups.size_s()) return -1;
return groups[index].index;
}
int PIRegularExpression::Matcher::matchedSize(int index) const {
if (index < 0 || index >= groups.size_s()) return -1;
return groups[index].size;
}
PIString PIRegularExpression::Matcher::matchedString(const PIString & gname) const {
return matchedString(parent->PRIVATEWB->named_group_index.value(gname));
}
int PIRegularExpression::Matcher::matchedStart(const PIString & gname) const {
return matchedStart(parent->PRIVATEWB->named_group_index.value(gname));
}
int PIRegularExpression::Matcher::matchedSize(const PIString & gname) const {
return matchedSize(parent->PRIVATEWB->named_group_index.value(gname));
}

View File

@@ -0,0 +1,118 @@
/*! \file pistring.h
* \ingroup Text
* \brief
* \~english Regular expression
* \~russian Регулярное выражение
*/
/*
PIP - Platform Independent Primitives
Regular expression
Ivan Pelipenko peri4ko@yandex.ru
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef piregularexpression_h
#define piregularexpression_h
#include <pistring.h>
class PIP_EXPORT PIRegularExpression {
public:
enum Option {
None = 0x0,
CaseInsensitive = 0x01,
Singleline = 0x02,
Multiline = 0x04,
InvertedGreediness = 0x08,
Extended = 0x10
};
typedef PIFlags<Option> Options;
PIRegularExpression(const PIString & pattern = {}, Options opt = None);
PIRegularExpression(const PIRegularExpression & o);
PIRegularExpression & operator=(const PIRegularExpression & o);
~PIRegularExpression();
class PIP_EXPORT Matcher {
friend class PIRegularExpression;
public:
operator bool() const { return hasMatch(); }
bool hasMatch() const;
bool next();
PIStringList matchedStrings() const;
PIString matchedString(int index = 0) const;
int matchedStart(int index = 0) const;
int matchedSize(int index = 0) const;
PIString matchedString(const PIString & gname) const;
int matchedStart(const PIString & gname) const;
int matchedSize(const PIString & gname) const;
Matcher(Matcher &&) = default;
private:
Matcher(PIRegularExpression * p);
Matcher(const Matcher &) = default;
Matcher & operator=(const Matcher &) = default;
struct Group {
int index = 0;
int size = 0;
};
PIChar * subjectPtr() const;
bool has_match = false;
bool is_error = false;
PIVector<Group> groups;
PIRegularExpression * parent = nullptr;
PIString * subject = nullptr;
PIString subject_own;
size_t start_offset = 0;
};
PIString pattern() const { return pat_; }
Options options() const { return opt_; }
void setPattern(const PIString & pattern);
void setPattern(const PIString & pattern, Options opt);
bool isValid() const;
bool isNotValid() const { return !isValid(); }
PIString errorString() const;
int errorPosition() const;
int captureGroupsCount() const;
PIStringList captureGroupNames() const;
PIString captureGroupName(int index) const;
int captureGroupIndex(const PIString & gname) const;
Matcher match(const PIString & subject, size_t offset = 0);
Matcher match(PIString & subject, size_t offset = 0);
Matcher makeMatcher(const PIString & subject, size_t offset = 0);
Matcher makeMatcher(PIString & subject, size_t offset = 0);
private:
PRIVATE_DECLARATION(PIP_EXPORT)
PIString pat_, subj_own;
Options opt_;
};
#endif

View File

@@ -52,6 +52,7 @@
#define PITEXTMODULE_H
#include "piconstchars.h"
#include "piregularexpression.h"
#include "pistringlist.h"
#include "pitextstream.h"