add PIRegularExpression
This commit is contained in:
@@ -36,6 +36,7 @@
|
||||
#include "piserializationmodule.h"
|
||||
#include "pistatemachinemodule.h"
|
||||
#include "pisystemmodule.h"
|
||||
#include "pitextmodule.h"
|
||||
#include "pithreadmodule.h"
|
||||
|
||||
#endif // PIP_H
|
||||
|
||||
288
libs/main/text/piregularexpression.cpp
Normal file
288
libs/main/text/piregularexpression.cpp
Normal file
@@ -0,0 +1,288 @@
|
||||
#include "piregularexpression.h"
|
||||
|
||||
// clang-format off
|
||||
#define PCRE2_CODE_UNIT_WIDTH 16
|
||||
#include <pcre2.h>
|
||||
#include <pistringlist.h>
|
||||
// clang-format on
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIRegularExpression)
|
||||
pcre2_code * compiled = nullptr;
|
||||
PIString error_msg;
|
||||
PCRE2_SIZE error_offset = 0;
|
||||
|
||||
pcre2_match_data * match_data = nullptr;
|
||||
|
||||
int capture_count = -1;
|
||||
PIMap<PIString, int> named_group_index;
|
||||
PIMap<int, PIString> named_group_name;
|
||||
|
||||
bool isCompiled() const {
|
||||
return compiled && match_data;
|
||||
}
|
||||
void free() {
|
||||
named_group_index.clear();
|
||||
named_group_name.clear();
|
||||
error_msg.clear();
|
||||
error_offset = 0;
|
||||
capture_count = -1;
|
||||
if (match_data) {
|
||||
pcre2_match_data_free(match_data);
|
||||
match_data = nullptr;
|
||||
}
|
||||
if (compiled) {
|
||||
pcre2_code_free(compiled);
|
||||
compiled = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
PIString getNEString(const void * ptr, uint32_t max_size) {
|
||||
PIString ret;
|
||||
auto * cptr = (PIChar *)ptr;
|
||||
uint32_t sz = 0;
|
||||
while (*cptr != PIChar()) {
|
||||
ret.append(*cptr);
|
||||
cptr++;
|
||||
if (++sz > max_size) break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t convertOptions(Options opt) {
|
||||
uint32_t ret = PCRE2_UTF | PCRE2_NO_UTF_CHECK;
|
||||
// clang-format off
|
||||
if (opt[CaseInsensitive ]) ret |= PCRE2_CASELESS ;
|
||||
if (opt[Singleline ]) ret |= PCRE2_FIRSTLINE;
|
||||
if (opt[Multiline ]) ret |= PCRE2_MULTILINE;
|
||||
if (opt[InvertedGreediness]) ret |= PCRE2_UNGREEDY ;
|
||||
if (opt[Extended ]) ret |= PCRE2_EXTENDED ;
|
||||
// clang-format on
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool compile(PIString & pat, Options opt) {
|
||||
free();
|
||||
if (pat.isEmpty()) return false;
|
||||
auto * pat_ptr = &(pat[0]);
|
||||
int error_number = 0;
|
||||
compiled = pcre2_compile((PCRE2_SPTR)pat_ptr, pat.size(), convertOptions(opt), &error_number, &error_offset, nullptr);
|
||||
if (!compiled) {
|
||||
PIChar buffer[256];
|
||||
int sz = pcre2_get_error_message(error_number, (PCRE2_UCHAR16 *)buffer, sizeof(buffer));
|
||||
error_msg = PIString(buffer, sz);
|
||||
return false;
|
||||
// printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer);
|
||||
}
|
||||
error_msg.clear();
|
||||
match_data = pcre2_match_data_create_from_pattern(compiled, nullptr);
|
||||
|
||||
uint32_t namecount = 0, name_entry_size = 0, cap_cout = 0;
|
||||
PCRE2_SPTR name_table = nullptr;
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_CAPTURECOUNT, &cap_cout);
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_NAMECOUNT, &namecount);
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_NAMETABLE, &name_table);
|
||||
capture_count = cap_cout;
|
||||
auto tabptr = name_table;
|
||||
for (uint32_t i = 0; i < namecount; i++) {
|
||||
int gnum = *(ushort *)tabptr;
|
||||
PIString gname = getNEString(tabptr + 1, name_entry_size);
|
||||
named_group_index[gname] = gnum;
|
||||
named_group_name[gnum] = gname;
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
|
||||
return isCompiled();
|
||||
}
|
||||
|
||||
void match(Matcher & ret) {
|
||||
int rc = pcre2_match(compiled,
|
||||
(PCRE2_SPTR)ret.subjectPtr(),
|
||||
ret.subject->size(),
|
||||
ret.start_offset,
|
||||
PCRE2_NO_UTF_CHECK,
|
||||
match_data,
|
||||
nullptr);
|
||||
ret.has_match = ret.is_error = false;
|
||||
ret.groups.clear();
|
||||
if (rc == PCRE2_ERROR_NOMATCH) return;
|
||||
if (rc < 0) {
|
||||
ret.is_error = true;
|
||||
} else {
|
||||
ret.has_match = true;
|
||||
auto ovector = pcre2_get_ovector_pointer(match_data);
|
||||
for (int i = 0; i < rc; i++) {
|
||||
Matcher::Group g;
|
||||
g.index = ovector[2 * i];
|
||||
g.size = ovector[2 * i + 1] - ovector[2 * i];
|
||||
// g.string = PIString(&(sub_ptr[g.index]), g.size);
|
||||
ret.groups << g;
|
||||
}
|
||||
ret.start_offset = ovector[1];
|
||||
}
|
||||
}
|
||||
|
||||
PRIVATE_DEFINITION_END(PIRegularExpression)
|
||||
|
||||
|
||||
PIRegularExpression::PIRegularExpression(const PIString & pattern, Options opt) {
|
||||
setPattern(pattern, opt);
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::PIRegularExpression(const PIRegularExpression & o) {
|
||||
setPattern(o.pat_, o.opt_);
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression & PIRegularExpression::operator=(const PIRegularExpression & o) {
|
||||
setPattern(o.pat_, o.opt_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::~PIRegularExpression() {
|
||||
PRIVATE->free();
|
||||
}
|
||||
|
||||
|
||||
void PIRegularExpression::setPattern(const PIString & pattern) {
|
||||
pat_ = pattern;
|
||||
PRIVATE->compile(pat_, opt_);
|
||||
}
|
||||
|
||||
|
||||
void PIRegularExpression::setPattern(const PIString & pattern, Options opt) {
|
||||
opt_ = opt;
|
||||
setPattern(pattern);
|
||||
}
|
||||
|
||||
|
||||
bool PIRegularExpression::isValid() const {
|
||||
return PRIVATE->isCompiled();
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::errorString() const {
|
||||
return PRIVATE->error_msg;
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::errorPosition() const {
|
||||
return PRIVATE->error_offset;
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::captureGroupsCount() const {
|
||||
return PRIVATE->capture_count;
|
||||
}
|
||||
|
||||
|
||||
PIStringList PIRegularExpression::captureGroupNames() const {
|
||||
return PRIVATE->named_group_name.values();
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::captureGroupName(int index) const {
|
||||
return PRIVATE->named_group_name.value(index);
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::captureGroupIndex(const PIString & gname) const {
|
||||
return PRIVATE->named_group_index.value(gname);
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::makeMatcher(PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret(this);
|
||||
ret.start_offset = offset;
|
||||
ret.subject = &subject;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::makeMatcher(const PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret(this);
|
||||
ret.start_offset = offset;
|
||||
ret.subject_own = subject;
|
||||
ret.subject = &ret.subject_own;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::match(PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret = makeMatcher(subject, offset);
|
||||
PRIVATE->match(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::match(const PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret = makeMatcher(subject, offset);
|
||||
PRIVATE->match(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher::Matcher(PIRegularExpression * p): parent(p) {}
|
||||
|
||||
|
||||
PIChar * PIRegularExpression::Matcher::subjectPtr() const {
|
||||
if (!subject) return nullptr;
|
||||
return &(*subject)[0];
|
||||
}
|
||||
|
||||
|
||||
bool PIRegularExpression::Matcher::hasMatch() const {
|
||||
return has_match;
|
||||
}
|
||||
|
||||
|
||||
bool PIRegularExpression::Matcher::next() {
|
||||
parent->PRIVATEWB->match(*this);
|
||||
return hasMatch();
|
||||
}
|
||||
|
||||
|
||||
PIStringList PIRegularExpression::Matcher::matchedStrings() const {
|
||||
if (!subject) return {};
|
||||
PIStringList ret;
|
||||
for (const auto & g: groups)
|
||||
ret << subject->mid(g.index, g.size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::Matcher::matchedString(int index) const {
|
||||
if (index < 0 || index >= groups.size_s()) return {};
|
||||
if (!subject) return {};
|
||||
return subject->mid(groups[index].index, groups[index].size);
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedStart(int index) const {
|
||||
if (index < 0 || index >= groups.size_s()) return -1;
|
||||
return groups[index].index;
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedSize(int index) const {
|
||||
if (index < 0 || index >= groups.size_s()) return -1;
|
||||
return groups[index].size;
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::Matcher::matchedString(const PIString & gname) const {
|
||||
return matchedString(parent->PRIVATEWB->named_group_index.value(gname));
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedStart(const PIString & gname) const {
|
||||
return matchedStart(parent->PRIVATEWB->named_group_index.value(gname));
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedSize(const PIString & gname) const {
|
||||
return matchedSize(parent->PRIVATEWB->named_group_index.value(gname));
|
||||
}
|
||||
118
libs/main/text/piregularexpression.h
Normal file
118
libs/main/text/piregularexpression.h
Normal file
@@ -0,0 +1,118 @@
|
||||
/*! \file pistring.h
|
||||
* \ingroup Text
|
||||
* \brief
|
||||
* \~english Regular expression
|
||||
* \~russian Регулярное выражение
|
||||
*/
|
||||
/*
|
||||
PIP - Platform Independent Primitives
|
||||
Regular expression
|
||||
Ivan Pelipenko peri4ko@yandex.ru
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef piregularexpression_h
|
||||
#define piregularexpression_h
|
||||
|
||||
#include <pistring.h>
|
||||
|
||||
class PIP_EXPORT PIRegularExpression {
|
||||
public:
|
||||
enum Option {
|
||||
None = 0x0,
|
||||
CaseInsensitive = 0x01,
|
||||
Singleline = 0x02,
|
||||
Multiline = 0x04,
|
||||
InvertedGreediness = 0x08,
|
||||
Extended = 0x10
|
||||
};
|
||||
typedef PIFlags<Option> Options;
|
||||
|
||||
PIRegularExpression(const PIString & pattern = {}, Options opt = None);
|
||||
PIRegularExpression(const PIRegularExpression & o);
|
||||
PIRegularExpression & operator=(const PIRegularExpression & o);
|
||||
~PIRegularExpression();
|
||||
|
||||
class PIP_EXPORT Matcher {
|
||||
friend class PIRegularExpression;
|
||||
|
||||
public:
|
||||
operator bool() const { return hasMatch(); }
|
||||
bool hasMatch() const;
|
||||
|
||||
bool next();
|
||||
|
||||
PIStringList matchedStrings() const;
|
||||
|
||||
PIString matchedString(int index = 0) const;
|
||||
int matchedStart(int index = 0) const;
|
||||
int matchedSize(int index = 0) const;
|
||||
|
||||
PIString matchedString(const PIString & gname) const;
|
||||
int matchedStart(const PIString & gname) const;
|
||||
int matchedSize(const PIString & gname) const;
|
||||
|
||||
Matcher(Matcher &&) = default;
|
||||
|
||||
private:
|
||||
Matcher(PIRegularExpression * p);
|
||||
Matcher(const Matcher &) = default;
|
||||
Matcher & operator=(const Matcher &) = default;
|
||||
|
||||
struct Group {
|
||||
int index = 0;
|
||||
int size = 0;
|
||||
};
|
||||
|
||||
PIChar * subjectPtr() const;
|
||||
|
||||
bool has_match = false;
|
||||
bool is_error = false;
|
||||
PIVector<Group> groups;
|
||||
PIRegularExpression * parent = nullptr;
|
||||
PIString * subject = nullptr;
|
||||
PIString subject_own;
|
||||
size_t start_offset = 0;
|
||||
};
|
||||
|
||||
PIString pattern() const { return pat_; }
|
||||
Options options() const { return opt_; }
|
||||
|
||||
void setPattern(const PIString & pattern);
|
||||
void setPattern(const PIString & pattern, Options opt);
|
||||
|
||||
bool isValid() const;
|
||||
bool isNotValid() const { return !isValid(); }
|
||||
PIString errorString() const;
|
||||
int errorPosition() const;
|
||||
|
||||
int captureGroupsCount() const;
|
||||
PIStringList captureGroupNames() const;
|
||||
PIString captureGroupName(int index) const;
|
||||
int captureGroupIndex(const PIString & gname) const;
|
||||
|
||||
Matcher match(const PIString & subject, size_t offset = 0);
|
||||
Matcher match(PIString & subject, size_t offset = 0);
|
||||
|
||||
Matcher makeMatcher(const PIString & subject, size_t offset = 0);
|
||||
Matcher makeMatcher(PIString & subject, size_t offset = 0);
|
||||
|
||||
private:
|
||||
PRIVATE_DECLARATION(PIP_EXPORT)
|
||||
PIString pat_, subj_own;
|
||||
Options opt_;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -52,6 +52,7 @@
|
||||
#define PITEXTMODULE_H
|
||||
|
||||
#include "piconstchars.h"
|
||||
#include "piregularexpression.h"
|
||||
#include "pistringlist.h"
|
||||
#include "pitextstream.h"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user