Merge pull request 'regexp' (#187) from regexp into master
Reviewed-on: #187
This commit was merged in pull request #187.
This commit is contained in:
@@ -36,6 +36,7 @@
|
||||
#include "piserializationmodule.h"
|
||||
#include "pistatemachinemodule.h"
|
||||
#include "pisystemmodule.h"
|
||||
#include "pitextmodule.h"
|
||||
#include "pithreadmodule.h"
|
||||
|
||||
#endif // PIP_H
|
||||
|
||||
@@ -23,8 +23,8 @@
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef pijsonserialization_H
|
||||
#define pijsonserialization_H
|
||||
#ifndef PIJSONSERIALIZATION_H
|
||||
#define PIJSONSERIALIZATION_H
|
||||
|
||||
#include "pijson.h"
|
||||
|
||||
@@ -52,7 +52,8 @@ template<typename T,
|
||||
typename std::enable_if<!std::is_enum<T>::value, int>::type = 0,
|
||||
typename std::enable_if<!std::is_arithmetic<T>::value, int>::type = 0>
|
||||
inline PIJSON piSerializeJSON(const T & v) {
|
||||
static_assert(false, "[piSerializeJSON] Error: using undeclared piSerializeJSON() for complex type!");
|
||||
static_assert(std::is_enum<T>::value || std::is_arithmetic<T>::value,
|
||||
"[piSerializeJSON] Error: using undeclared piSerializeJSON() for complex type!");
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -190,8 +191,8 @@ inline PIJSON piSerializeJSON(const PIDeque<T> & v) {
|
||||
template<typename T>
|
||||
inline PIJSON piSerializeJSON(const PIVector2D<T> & v) {
|
||||
PIJSON ret;
|
||||
ret["cols"] = v.cols();
|
||||
ret["rows"] = v.rows();
|
||||
ret["cols"] = static_cast<uint>(v.cols());
|
||||
ret["rows"] = static_cast<uint>(v.rows());
|
||||
ret["mat"] = piSerializeJSON(v.plainVector());
|
||||
return ret;
|
||||
}
|
||||
@@ -239,7 +240,8 @@ template<typename T,
|
||||
typename std::enable_if<!std::is_enum<T>::value, int>::type = 0,
|
||||
typename std::enable_if<!std::is_arithmetic<T>::value, int>::type = 0>
|
||||
inline void piDeserializeJSON(T & v, const PIJSON & js) {
|
||||
static_assert(false, "[piDeserializeJSON] Error: using undeclared piDeserializeJSON() for complex type!");
|
||||
static_assert(std::is_enum<T>::value || std::is_arithmetic<T>::value,
|
||||
"[piDeserializeJSON] Error: using undeclared piDeserializeJSON() for complex type!");
|
||||
v = {};
|
||||
}
|
||||
|
||||
@@ -253,10 +255,9 @@ inline void piDeserializeJSON(PIVariant & v, const PIJSON & js) {
|
||||
|
||||
template<typename T>
|
||||
inline void piDeserializeJSON(complex<T> & v, const PIJSON & js) {
|
||||
T c[2];
|
||||
piDeserializeJSON(c[0], js[0]);
|
||||
piDeserializeJSON(c[1], js[1]);
|
||||
v = complex<T>(c[0], c[1]);
|
||||
if (!js.isArray()) return;
|
||||
piDeserializeJSON(reinterpret_cast<T(&)[2]>(v)[0], js[0]);
|
||||
piDeserializeJSON(reinterpret_cast<T(&)[2]>(v)[1], js[1]);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@@ -364,10 +365,10 @@ template<typename T>
|
||||
inline void piDeserializeJSON(PIVector2D<T> & v, const PIJSON & js) {
|
||||
v.clear();
|
||||
if (!js.isObject()) return;
|
||||
v.resize(js["rows"].toInt(), js["cols"].toInt());
|
||||
const auto & mat(js["mat"]);
|
||||
if (!mat.isArray()) return;
|
||||
piDeserializeJSON(v.plainVector(), mat);
|
||||
v.resize(js["rows"].toInt(), js["cols"].toInt());
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@@ -410,4 +411,4 @@ T PIJSON::deserialize(const PIJSON & json) {
|
||||
}
|
||||
|
||||
|
||||
#endif // pijsonserialization_h
|
||||
#endif // PIJSONSERIALIZATION_H
|
||||
|
||||
337
libs/main/text/piregularexpression.cpp
Normal file
337
libs/main/text/piregularexpression.cpp
Normal file
@@ -0,0 +1,337 @@
|
||||
#include "piregularexpression.h"
|
||||
|
||||
// clang-format off
|
||||
#define PCRE2_CODE_UNIT_WIDTH 16
|
||||
#include <pcre2.h>
|
||||
#include <pistringlist.h>
|
||||
// clang-format on
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIRegularExpression)
|
||||
pcre2_code * compiled = nullptr;
|
||||
PIString error_msg;
|
||||
PCRE2_SIZE error_offset = 0;
|
||||
|
||||
pcre2_match_data * match_data = nullptr;
|
||||
|
||||
int capture_count = -1;
|
||||
PIMap<PIString, int> named_group_index;
|
||||
PIMap<int, PIString> named_group_name;
|
||||
|
||||
bool isCompiled() const {
|
||||
return compiled && match_data;
|
||||
}
|
||||
void free() {
|
||||
named_group_index.clear();
|
||||
named_group_name.clear();
|
||||
error_msg.clear();
|
||||
error_offset = 0;
|
||||
capture_count = -1;
|
||||
if (match_data) {
|
||||
pcre2_match_data_free(match_data);
|
||||
match_data = nullptr;
|
||||
}
|
||||
if (compiled) {
|
||||
pcre2_code_free(compiled);
|
||||
compiled = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
PIString getNEString(const void * ptr, uint32_t max_size) {
|
||||
PIString ret;
|
||||
const auto * cptr = static_cast<const PIChar *>(ptr);
|
||||
uint32_t sz = 0;
|
||||
while (*cptr != PIChar()) {
|
||||
ret.append(*cptr);
|
||||
cptr++;
|
||||
if (++sz > max_size) break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint32_t convertOptions(Options opt) {
|
||||
uint32_t ret = PCRE2_UTF | PCRE2_NO_UTF_CHECK;
|
||||
// clang-format off
|
||||
if (opt[CaseInsensitive ]) ret |= PCRE2_CASELESS ;
|
||||
if (opt[Singleline ]) ret |= PCRE2_FIRSTLINE;
|
||||
if (opt[Multiline ]) ret |= PCRE2_MULTILINE;
|
||||
if (opt[InvertedGreediness]) ret |= PCRE2_UNGREEDY ;
|
||||
if (opt[Extended ]) ret |= PCRE2_EXTENDED ;
|
||||
// clang-format on
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool compile(PIString & pat, Options opt) {
|
||||
free();
|
||||
if (pat.isEmpty()) return false;
|
||||
const auto * pat_ptr = &pat.front();
|
||||
int error_number = 0;
|
||||
compiled = pcre2_compile((PCRE2_SPTR)pat_ptr, pat.size(), convertOptions(opt), &error_number, &error_offset, nullptr);
|
||||
if (!compiled) {
|
||||
PIChar buffer[256];
|
||||
const int sz = pcre2_get_error_message(error_number, reinterpret_cast<PCRE2_UCHAR16 *>(buffer), sizeof(buffer));
|
||||
error_msg = PIString(buffer, sz);
|
||||
return false;
|
||||
}
|
||||
error_msg.clear();
|
||||
match_data = pcre2_match_data_create_from_pattern(compiled, nullptr);
|
||||
|
||||
uint32_t namecount = 0, name_entry_size = 0, cap_cout = 0;
|
||||
PCRE2_SPTR name_table = nullptr;
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_CAPTURECOUNT, &cap_cout);
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_NAMECOUNT, &namecount);
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
|
||||
pcre2_pattern_info(compiled, PCRE2_INFO_NAMETABLE, &name_table);
|
||||
capture_count = cap_cout;
|
||||
auto tabptr = name_table;
|
||||
for (uint32_t i = 0; i < namecount; i++) {
|
||||
const int gnum = *tabptr;
|
||||
PIString gname = getNEString(tabptr + 1, name_entry_size);
|
||||
named_group_index[gname] = gnum;
|
||||
named_group_name[gnum] = gname;
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
|
||||
return isCompiled();
|
||||
}
|
||||
|
||||
void match(Matcher & ret) {
|
||||
const int rc = pcre2_match(compiled,
|
||||
(PCRE2_SPTR)ret.subjectPtr(),
|
||||
ret.subject->size(),
|
||||
ret.start_offset,
|
||||
PCRE2_NO_UTF_CHECK,
|
||||
match_data,
|
||||
nullptr);
|
||||
ret.has_match = ret.is_error = false;
|
||||
ret.groups.clear();
|
||||
if (rc == PCRE2_ERROR_NOMATCH) return;
|
||||
if (rc < 0) {
|
||||
ret.is_error = true;
|
||||
} else {
|
||||
ret.has_match = true;
|
||||
const auto ovector = pcre2_get_ovector_pointer(match_data);
|
||||
for (int i = 0; i < rc; i++) {
|
||||
Matcher::Group g;
|
||||
g.index = ovector[2 * i];
|
||||
g.size = ovector[2 * i + 1] - ovector[2 * i];
|
||||
ret.groups << g;
|
||||
}
|
||||
ret.start_offset = ovector[1];
|
||||
}
|
||||
}
|
||||
|
||||
PRIVATE_DEFINITION_END(PIRegularExpression)
|
||||
|
||||
|
||||
PIRegularExpression::PIRegularExpression(const PIString & pattern, Options opt) {
|
||||
setPattern(pattern, opt);
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::PIRegularExpression(const PIRegularExpression & o) {
|
||||
setPattern(o.pat_, o.opt_);
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression & PIRegularExpression::operator=(const PIRegularExpression & o) {
|
||||
setPattern(o.pat_, o.opt_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::~PIRegularExpression() {
|
||||
PRIVATE->free();
|
||||
}
|
||||
|
||||
|
||||
void PIRegularExpression::setPattern(const PIString & pattern) {
|
||||
pat_ = pattern;
|
||||
PRIVATE->compile(pat_, opt_);
|
||||
}
|
||||
|
||||
|
||||
void PIRegularExpression::setPattern(const PIString & pattern, Options opt) {
|
||||
opt_ = opt;
|
||||
setPattern(pattern);
|
||||
}
|
||||
|
||||
|
||||
bool PIRegularExpression::isValid() const {
|
||||
return PRIVATE->isCompiled();
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::errorString() const {
|
||||
return PRIVATE->error_msg;
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::errorPosition() const {
|
||||
return PRIVATE->error_offset;
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::captureGroupsCount() const {
|
||||
return PRIVATE->capture_count;
|
||||
}
|
||||
|
||||
|
||||
PIStringList PIRegularExpression::captureGroupNames() const {
|
||||
return PRIVATE->named_group_name.values();
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::captureGroupName(int index) const {
|
||||
return PRIVATE->named_group_name.value(index);
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::captureGroupIndex(const PIString & gname) const {
|
||||
return PRIVATE->named_group_index.value(gname);
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::matchIterator(PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret(this);
|
||||
ret.start_offset = offset;
|
||||
ret.subject = &subject;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::matchIterator(PIString && subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret(this);
|
||||
ret.start_offset = offset;
|
||||
ret.subject_own = std::move(subject);
|
||||
ret.subject = &ret.subject_own;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::matchIterator(const PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret(this);
|
||||
ret.start_offset = offset;
|
||||
ret.subject_own = subject;
|
||||
ret.subject = &ret.subject_own;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::match(PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret = matchIterator(subject, offset);
|
||||
PRIVATE->match(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::match(PIString && subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret = matchIterator(std::move(subject), offset);
|
||||
PRIVATE->match(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher PIRegularExpression::match(const PIString & subject, size_t offset) {
|
||||
PIRegularExpression::Matcher ret = matchIterator(subject, offset);
|
||||
PRIVATE->match(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression::Matcher::Matcher(PIRegularExpression * p): parent(p) {}
|
||||
|
||||
|
||||
PIChar * PIRegularExpression::Matcher::subjectPtr() const {
|
||||
if (!subject) return nullptr;
|
||||
return &subject->front();
|
||||
}
|
||||
|
||||
|
||||
bool PIRegularExpression::Matcher::hasMatch() const {
|
||||
return has_match;
|
||||
}
|
||||
|
||||
|
||||
bool PIRegularExpression::Matcher::next() {
|
||||
parent->PRIVATEWB->match(*this);
|
||||
return hasMatch();
|
||||
}
|
||||
|
||||
|
||||
PIStringList PIRegularExpression::Matcher::matchedStrings() const {
|
||||
if (!subject) return {};
|
||||
PIStringList ret;
|
||||
for (const auto & g: groups) {
|
||||
ret << subject->mid(g.index, g.size);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::Matcher::matchedString(int index) const {
|
||||
if (index < 0 || index >= groups.size_s()) return {};
|
||||
if (!subject) return {};
|
||||
return subject->mid(groups[index].index, groups[index].size);
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedStart(int index) const {
|
||||
if (index < 0 || index >= groups.size_s()) return -1;
|
||||
return groups[index].index;
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedSize(int index) const {
|
||||
if (index < 0 || index >= groups.size_s()) return -1;
|
||||
return groups[index].size;
|
||||
}
|
||||
|
||||
|
||||
PIString PIRegularExpression::Matcher::matchedString(const PIString & gname) const {
|
||||
return matchedString(parent->PRIVATEWB->named_group_index.value(gname));
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedStart(const PIString & gname) const {
|
||||
return matchedStart(parent->PRIVATEWB->named_group_index.value(gname));
|
||||
}
|
||||
|
||||
|
||||
int PIRegularExpression::Matcher::matchedSize(const PIString & gname) const {
|
||||
return matchedSize(parent->PRIVATEWB->named_group_index.value(gname));
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression PIRegularExpression::fromGlob(const PIString & pattern, Options opt) {
|
||||
PIRegularExpression ret;
|
||||
ret.convertFrom(pattern, PCRE2_CONVERT_GLOB, opt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIRegularExpression PIRegularExpression::fromPOSIX(const PIString & pattern, Options opt) {
|
||||
PIRegularExpression ret;
|
||||
ret.convertFrom(pattern, PCRE2_CONVERT_POSIX_BASIC, opt);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void PIRegularExpression::convertFrom(const PIString & pattern, uint type, Options opt) {
|
||||
if (pattern.isEmpty()) return;
|
||||
const auto cptr = &const_cast<PIString &>(pattern).front();
|
||||
PCRE2_UCHAR * out = nullptr;
|
||||
PCRE2_SIZE out_size = 0;
|
||||
const int rc = pcre2_pattern_convert((PCRE2_SPTR)cptr,
|
||||
pattern.size_s(),
|
||||
type | PCRE2_CONVERT_UTF | PCRE2_CONVERT_NO_UTF_CHECK,
|
||||
&out,
|
||||
&out_size,
|
||||
nullptr);
|
||||
if (rc != 0) {
|
||||
piCout << "PIRegularExpression::convertFrom error" << rc;
|
||||
} else {
|
||||
setPattern(PIString(reinterpret_cast<PIChar *>(out), out_size), opt);
|
||||
}
|
||||
pcre2_converted_pattern_free(out);
|
||||
}
|
||||
125
libs/main/text/piregularexpression.h
Normal file
125
libs/main/text/piregularexpression.h
Normal file
@@ -0,0 +1,125 @@
|
||||
/*! \file pistring.h
|
||||
* \ingroup Text
|
||||
* \brief
|
||||
* \~english Regular expression
|
||||
* \~russian Регулярное выражение
|
||||
*/
|
||||
/*
|
||||
PIP - Platform Independent Primitives
|
||||
Regular expression
|
||||
Ivan Pelipenko peri4ko@yandex.ru
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef PIREGULAREXPRESSION_H
|
||||
#define PIREGULAREXPRESSION_H
|
||||
|
||||
#include "pistring.h"
|
||||
|
||||
class PIP_EXPORT PIRegularExpression {
|
||||
public:
|
||||
enum Option {
|
||||
None = 0x0,
|
||||
CaseInsensitive = 0x01,
|
||||
Singleline = 0x02,
|
||||
Multiline = 0x04,
|
||||
InvertedGreediness = 0x08,
|
||||
Extended = 0x10
|
||||
};
|
||||
typedef PIFlags<Option> Options;
|
||||
|
||||
PIRegularExpression(const PIString & pattern = {}, Options opt = None);
|
||||
PIRegularExpression(const PIRegularExpression & o);
|
||||
PIRegularExpression & operator=(const PIRegularExpression & o);
|
||||
~PIRegularExpression();
|
||||
|
||||
class PIP_EXPORT Matcher {
|
||||
friend class PIRegularExpression;
|
||||
|
||||
public:
|
||||
operator bool() const { return hasMatch(); }
|
||||
bool hasMatch() const;
|
||||
|
||||
bool next();
|
||||
|
||||
PIStringList matchedStrings() const;
|
||||
|
||||
PIString matchedString(int index = 0) const;
|
||||
int matchedStart(int index = 0) const;
|
||||
int matchedSize(int index = 0) const;
|
||||
|
||||
PIString matchedString(const PIString & gname) const;
|
||||
int matchedStart(const PIString & gname) const;
|
||||
int matchedSize(const PIString & gname) const;
|
||||
|
||||
Matcher(Matcher &&) = default;
|
||||
|
||||
private:
|
||||
Matcher(PIRegularExpression * p);
|
||||
Matcher(const Matcher &) = default;
|
||||
Matcher & operator=(const Matcher &) = default;
|
||||
|
||||
struct Group {
|
||||
int index = 0;
|
||||
int size = 0;
|
||||
};
|
||||
|
||||
PIChar * subjectPtr() const;
|
||||
|
||||
bool has_match = false;
|
||||
bool is_error = false;
|
||||
PIVector<Group> groups;
|
||||
PIRegularExpression * parent = nullptr;
|
||||
PIString * subject = nullptr;
|
||||
PIString subject_own;
|
||||
size_t start_offset = 0;
|
||||
};
|
||||
|
||||
PIString pattern() const { return pat_; }
|
||||
Options options() const { return opt_; }
|
||||
|
||||
void setPattern(const PIString & pattern);
|
||||
void setPattern(const PIString & pattern, Options opt);
|
||||
|
||||
bool isValid() const;
|
||||
bool isNotValid() const { return !isValid(); }
|
||||
PIString errorString() const;
|
||||
int errorPosition() const;
|
||||
|
||||
int captureGroupsCount() const;
|
||||
PIStringList captureGroupNames() const;
|
||||
PIString captureGroupName(int index) const;
|
||||
int captureGroupIndex(const PIString & gname) const;
|
||||
|
||||
Matcher match(const PIString & subject, size_t offset = 0);
|
||||
Matcher match(PIString & subject, size_t offset = 0);
|
||||
Matcher match(PIString && subject, size_t offset = 0);
|
||||
|
||||
Matcher matchIterator(const PIString & subject, size_t offset = 0);
|
||||
Matcher matchIterator(PIString & subject, size_t offset = 0);
|
||||
Matcher matchIterator(PIString && subject, size_t offset = 0);
|
||||
|
||||
static PIRegularExpression fromGlob(const PIString & pattern, Options opt = None);
|
||||
static PIRegularExpression fromPOSIX(const PIString & pattern, Options opt = None);
|
||||
|
||||
private:
|
||||
void convertFrom(const PIString & pattern, uint type, Options opt);
|
||||
|
||||
PRIVATE_DECLARATION(PIP_EXPORT)
|
||||
PIString pat_;
|
||||
Options opt_;
|
||||
};
|
||||
|
||||
#endif // PIREGULAREXPRESSION_H
|
||||
@@ -52,6 +52,7 @@
|
||||
#define PITEXTMODULE_H
|
||||
|
||||
#include "piconstchars.h"
|
||||
#include "piregularexpression.h"
|
||||
#include "pistringlist.h"
|
||||
#include "pitextstream.h"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user