Files
qad/_oldpip/pip_0.4.0_r5/pistring.cpp
Бычков Андрей ba8bc27298 1
git-svn-id: svn://db.shs.com.ru/libs@1 a8b55f48-bf90-11e4-a774-851b48703e85
2015-02-28 21:28:53 +00:00

765 lines
19 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
PIP - Platform Independent Primitives
String
Copyright (C) 2014 Ivan Pelipenko peri4ko@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "pistring.h"
/*! \class PIString
* \brief String class
* \details PIP use this class for use string information.
*
* \section PIString_sec0 Synopsis
* This class based on \a PIVector to store information.
* String is a sequence of \a PIChar and can contain multibyte
* symbols. Therefore real memory size of string is symbols count * 4.
* String can be constucted from many types of data and can be converted
* to many types. There are man operators and handly functions to use
* string as you wish.
*
* \section PIString_sec1 To/from data convertions
* Most common constructor is \a PIString(const char * str), where "str"
* is null-terminated string, e.g. \c "string". This is 7 chars with last char = 0.
* Also you can constructs \a PIString from single \a PIChar, \a PIByteArray,
* other \a PIString or sequency of the same characters with custom length.\n \n
* This class has implicit conversions to <tt>const char * </tt> and
* \c std::string. Also there are functions to make same convertions:
* * \a data() - to <tt>const char * </tt>,
* * \a stdString() - to \c std::string,
* * \a toByteArray() - to \a PIByteArray.
*
* \section PIString_sec2 Numeric operations
* You can get symbolic representation of any numeric value with function
* \a setNumber(any integer value, int base = 10, bool * ok = 0). Default
* arguments are set for decimal base system, but you can choose any system
* from 2 to 40. There are the same static functions \a fromNumber(), that
* returns \a PIString. \n
* Also there is function \a setReadableSize() which is set human-readable
* size in bytes, Kb, Mb, Gb or Pb. Static analog is \a readableSize().
*
*/
const char PIString::toBaseN[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^'};
const int PIString::fromBaseN[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, -1,
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
void PIString::appendFromChars(const char * c, int s) {
int sz;
wchar_t wc;
for (int i = 0; i < s; ++i) {
if (/*isascii(c[i])*/c[i] >= 0) {
push_back(PIChar(c[i]));
continue;
}
sz = mbtowc(&wc, &(c[i]), 4);
//cout << sz << endl;
switch (sz) {
case 4:
push_back(PIChar(*(int*)&(c[i])));
i += 3;
continue;
case 3:
push_back(PIChar(*(int*)&(c[i])));
back().ch &= 0xFFFFFF;
i += 2;
continue;
case 2:
push_back(PIChar(*(short * )&(c[i])));
++i;
continue;
default:
push_back(PIChar(c[i]));
break;
}
}
}
PIString & PIString::operator +=(const char * str) {
int l = 0;
while (str[l] != '\0') ++l;
appendFromChars(str, l);
return *this;
}
PIString & PIString::operator +=(const wchar_t * str) {
//cout << "wc" << endl;
int l = 0, sz;
char * c = new char[MB_CUR_MAX];
while (str[l] != 0) ++l;
for (int i = 0; i < l; ++i) {
sz = wctomb(c, str[i]);
switch (sz) {
case 4:
push_back(PIChar(*(int*)c));
continue;
case 3:
push_back(PIChar(*(int*)c));
back().ch &= 0xFFFFFF;
continue;
case 2:
push_back(PIChar(*(short * )c));
continue;
default:
push_back(PIChar(c[0]));
break;
}
}
delete[] c;
return *this;
}
#ifdef HAS_LOCALE
PIString & PIString::operator +=(const wstring & str) {
uint l = str.size();
for (uint i = 0; i < l; ++i) push_back(str[i]);
return *this;
}
#endif
PIString & PIString::operator +=(const PIString & str) {
//uint l = str.size();
*((PIDeque<PIChar>*)this) << *((PIDeque<PIChar>*)&str);
return *this;
}
bool PIString::operator ==(const PIString & str) const {
uint l = str.size();
if (size() != l) return false;
for (uint i = 0; i < l; ++i)
if (str[i] != at(i))
return false;
return true;
}
bool PIString::operator !=(const PIString & str) const {
uint l = str.size();
if (size() != l) return true;
for (uint i = 0; i < l; ++i)
if (str[i] != at(i))
return true;
return false;
}
bool PIString::operator <(const PIString & str) const {
uint l = str.size();
if (size() < l) return true;
if (size() > l) return false;
for (uint i = 0; i < l; ++i) {
if (str[i] == at(i)) continue;
if (str[i] < at(i)) return true;
else return false;
}
return false;
}
bool PIString::operator >(const PIString & str) const {
uint l = str.size();
if (size() < l) return false;
if (size() > l) return true;
for (uint i = 0; i < l; ++i) {
if (str[i] == at(i)) continue;
if (str[i] < at(i)) return false;
else return true;
}
return false;
}
PIString PIString::mid(const int start, const int len) const {
//PIString str;
int s = start, l = len;
if (l == 0) return PIString();
if (s < 0) {
l += s;
s = 0;
}
if (l < 0) {
//for (uint i = s; i < size(); ++i)
// str += at(i);
return PIString(&(at(s)), size() - s);
} else {
if (l > length() - s)
l = length() - s;
//for (int i = s; i < s + l; ++i)
// str += at(i);
return PIString(&(at(s)), l);
}
return PIString();
}
PIString & PIString::cutMid(const int start, const int len) {
int s = start, l = len;
if (l == 0) return *this;
if (s < 0) {
l += s;
s = 0;
}
if (l < 0)
remove(s, size() - s);
else {
if (l > length() - s)
l = length() - s;
remove(s, l);
}
return *this;
}
PIString & PIString::trim() {
int st = 0, fn = 0;
for (int i = 0; i < length(); ++i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{st = i; break;}
for (int i = length() - 1; i >= 0; --i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{fn = i; break;}
//*this = mid(st, fn - st + 1);
if (fn < size_s() - 1) cutRight(size_s() - fn - 1);
if (st > 0) cutLeft(st);
return *this;
}
PIString PIString::trimmed() const {
int st = 0, fn = 0;
for (int i = 0; i < length(); ++i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{st = i; break;}
for (int i = length() - 1; i >= 0; --i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{fn = i; break;}
return mid(st, fn - st + 1);
}
PIString & PIString::replace(int from, int count, const PIString & with) {
if (count < length() - from) remove(from, count);
else remove(from, length() - from);
uint c = with.length();
for (uint i = 0; i < c; ++i) insert(from + i, with[i]);
return *this;
}
PIString & PIString::replace(const PIString & what, const PIString & with, bool * ok) {
//piCout << "replace" << what << with;
if (what.isEmpty()) {
if (ok != 0) *ok = false;
return *this;
}
int s = find(what);
if (s >= 0) replace(s, what.length(), with);
if (ok != 0) *ok = (s >= 0);
return *this;
}
PIString & PIString::replaceAll(const PIString & what, const PIString & with) {
if (what.isEmpty() || what == with) return *this;
bool ok = true;
while (ok) replace(what, with, &ok);
return *this;
}
PIString & PIString::insert(int index, const PIString & str) {
//uint c = str.length();
//for (uint i = 0; i < c; ++i) insert(index + i, str[i]);
PIDeque<PIChar>::insert(index, *((const PIDeque<PIChar>*)&str));
return *this;
}
PIStringList PIString::split(const PIString & delim) const {
PIStringList sl;
if (isEmpty() || delim.isEmpty()) return sl;
PIString ts(*this);
int ci = ts.find(delim);
while (ci >= 0) {
sl << ts.left(ci);
ts.cutLeft(ci + delim.length());
ci = ts.find(delim);
}
if (ts.length() > 0) sl << ts;
return sl;
}
int PIString::find(const char str, const int start) const {
for (int i = start; i < length(); ++i)
if (at(i) == str)
return i;
return -1;
}
int PIString::find(const PIString str, const int start) const {
int l = str.length();
for (int i = start; i < length() - l + 1; ++i)
if (mid(i, l) == str)
return i;
return -1;
}
int PIString::findLast(const char str, const int start) const {
for (int i = length() - 1; i >= start; --i)
if (at(i) == str)
return i;
return -1;
}
int PIString::findLast(const PIString str, const int start) const {
int l = str.length();
for (int i = length() - l; i >= start; --i)
if (mid(i, l) == str)
return i;
return -1;
}
int PIString::findWord(const PIString & word, const int start) const {
int f = start - 1, tl = length(), wl = word.length();
while ((f = find(word, f + 1)) >= 0) {
bool ok = true;
PIChar c;
if (f > 0) {c = (*this)[f - 1]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r')) {ok = false; continue;}}
if (f + wl < tl) {c = (*this)[f + wl]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r')) {ok = false; continue;}}
if (ok) return f;
}
return -1;
}
int PIString::findCWord(const PIString & word, const int start) const {
int f = start - 1, tl = length(), wl = word.length();
while ((f = find(word, f + 1)) >= 0) {
bool ok = true;
PIChar c;
if (f > 0) {c = (*this)[f - 1]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r' || (c != '_' && !c.isAlpha() && !c.isDigit()))) {ok = false; continue;}}
if (f + wl < tl) {c = (*this)[f + wl]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r' || (c != '_' && !c.isAlpha() && !c.isDigit()))) {ok = false; continue;}}
if (ok) return f;
}
return -1;
}
bool PIString::startsWith(const PIString & str) const {
if (size() < str.size()) return false;
return str == left(str.size());
}
bool PIString::endsWith(const PIString & str) const {
if (size() < str.size()) return false;
return str == right(str.size());
}
PIString PIString::takeSymbol() {
PIString ret;
int sz = size_s(), ss = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
ss = i;
break;
}
if (ss < 0) return ret;
ret = mid(ss, 1);
cutLeft(ss + 1);
return ret;
}
PIString PIString::takeWord() {
int sz = size_s(), ws = -1, we = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
if (we < 0 && ws >= 0) {
we = i;
break;
}
} else {
if (ws < 0) ws = i;
if (we >= 0) break;
}
}
PIString ret = mid(ws, we - ws);
cutLeft(we < 0 ? sz : we);
return ret;
}
PIString PIString::takeCWord() {
PIString ret;
int sz = size_s(), ws = -1, we = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
if (we < 0 && ws >= 0) {
we = i;
break;
}
} else {
if (ws < 0) {
if (c.isAlpha() || c == '_')
ws = i;
else
return ret;
} else {
if (!c.isAlpha() && !c.isDigit() && c != '_') {
we = i;
break;
}
}
if (we >= 0) break;
}
}
ret = mid(ws, we - ws);
cutLeft(we < 0 ? sz : we);
return ret;
}
PIString PIString::takeLine() {
int sz = size_s(), le = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == '\n') {
le = i;
break;
}
}
PIString ret = left(le);
if (!ret.isEmpty())
if (ret.back() == '\r')
ret.cutRight(1);
cutLeft(le < 0 ? sz : le + 1);
return ret;
}
PIString PIString::takeNumber() {
PIString ret;
int sz = size_s(), ls = -1, le = -1, phase = 0;
for (int i = 0; i < sz; ++i) {
if (phase > 7) break;
PIChar c = at(i);
//piCout << "char " << c << "phase" << phase;
switch (phase) {
case 0: // trim
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
phase = 7;
case 7: // sign
if (c == '-' || c == '+') {ls = i; phase = 1; break;}
case 1: // search start
if (c >= '0' && c <= '9') {le = i; if (ls < 0) ls = i; phase = 2; break;}
if (c == '.') {le = i; if (ls < 0) ls = i; phase = 3; break;}
phase = 9;
break;
case 2: // integer
if (c == '.') {le = i; phase = 3; break;}
if (c == 'e' || c == 'E') {le = i; phase = 4; break;}
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == 'x') {le = i; break;}
phase = 6;
break;
case 3: // point
if (c == 'e' || c == 'E') {le = i; phase = 4; break;}
if (c >= '0' && c <= '9') {le = i; break;}
phase = 6;
break;
case 4: // exp
if ((c >= '0' && c <= '9') || c == '-' || c == '+') {le = i; phase = 5; break;}
phase = 6;
break;
case 5: // power
if (c >= '0' && c <= '9') {le = i; break;}
phase = 6;
break;
case 6: // suffix
if (c == 'f' || c == 's' || c == 'u' || c == 'l' || c == 'L') {le = i; break;}
phase = 9;
break;
}
if (phase == 6) {
if (c == 'f' || c == 's' || c == 'u' || c == 'l' || c == 'L') le = i;
else phase = 9;
}
}
//piCout << ls << le;
if (le < ls) return ret;
ret = mid(ls, le - ls + 1);
cutLeft(le + 1);
return ret;
}
PIString PIString::takeRange(const PIChar & start, const PIChar & end, const PIChar & shield) {
PIString ret;
bool trim_ = (start != ' ' && start != '\t' && start != '\n' && start != '\r'), eq = (start == end);
int sz = size_s(), ls = -1, le = -1, cnt = 0;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == shield) {++i; continue;}
if (trim_) {
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
trim_ = false;
}
if (eq) {
if (c == start) {
if (cnt == 0) ls = i;
else {le = i; cnt = 0; break;}
cnt++;
}
} else {
if (c == start) {
if (cnt == 0) ls = i;
cnt++;
}
if (c == end) {
cnt--;
if (cnt == 0) le = i;
}
}
if (cnt <= 0) break;
}
//piCout << ls << le << cnt;
if (le < ls || ls < 0 || le < 0 || cnt != 0) return ret;
ret = mid(ls + 1, le - ls - 1);
cutLeft(le + 1);
return ret;
}
PIString PIString::toUpperCase() const {
PIString str(*this);
int l = str.size();
for (int i = 0; i < l; ++i) str[i] = str[i].toUpper();
return str;
}
PIString PIString::toLowerCase() const {
PIString str(*this);
int l = str.size();
for (int i = 0; i < l; ++i) str[i] = str[i].toLower();
return str;
}
int PIString::lengthAscii() const {
int j = 0;
for (int i = 0; i < size_s(); ++i, ++j)
if (!at(i).isAscii()) ++j;
return j;
}
const char * PIString::data() const {
data_.clear();
uint wc;
uchar tc;
//printf("PIString::data %d\n", size_s());
for (int i = 0, j = 0; i < size_s(); ++i) {
wc = uint(at(i).toInt());
//printf("__%d_%d\n", i, wc);
while (tc = wc & 0xFF, tc) {
data_.push_back(uchar(tc)); ++j;
wc >>= 8;
//printf("____%d\n", wc);
}
/*if (at(i).isAscii())
data_.push_back(uchar(at(i).toAscii()));
else {
data_.push_back((at(i).toCharPtr()[0])); ++j;
data_.push_back((at(i).toCharPtr()[1]));
}*/
}
data_.push_back(uchar('\0'));
return (const char * )data_.data();
}
string PIString::convertToStd() const {
string s;
uint wc;
uchar tc;
if (size() > 0) {
for (int i = 0; i < length(); ++i) {
wc = uint(at(i).toInt());
while (tc = wc & 0xFF, tc) {
s.push_back(char(tc));
wc >>= 8;
}
/*if (at(i).isAscii())
s.push_back(at(i).toAscii());
else {
s.push_back(at(i).toCharPtr()[0]);
s.push_back(at(i).toCharPtr()[1]);
}*/
}
}
return s;
}
char PIString::toChar() const {
PIString s(toNativeDecimalPoints());
char v;
sscanf(s.data(), "%c", &v);
return v;
}
/*
short PIString::toShort() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
short v;
if (s.left(2) == "0x") {sscanf(s.data(), "%hx", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%ho", &v); return v;}
sscanf(s.data(), "%hd", &v);
return v;
}
int PIString::toInt() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
int v;
if (s.left(2) == "0x") {sscanf(s.data(), "%x", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%o", &v); return v;}
sscanf(s.data(), "%d", &v);
return v;
}
long PIString::toLong() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
long v;
if (s.left(2) == "0x") {sscanf(s.data(), "%lx", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%lo", &v); return v;}
sscanf(s.data(), "%ld", &v);
return v;
}
llong PIString::toLLong() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
llong v;
if (s.left(2) == "0x") {sscanf(s.data(), "%llx", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%llo", &v); return v;}
sscanf(s.data(), "%lld", &v);
return v;
}
*/
PIString & PIString::setReadableSize(llong bytes) {
clear();
if (bytes < 1024) {*this += (PIString::fromNumber(bytes) + " B"); return *this;}
double fres = bytes / 1024.;
llong res = bytes / 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " kB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " MB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " GB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " TB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " PB");
return *this;
}
inline char chrUpr(char c) {
if (c >= 'a' && c <= 'z') return c + 'A' - 'a';
//if (c >= 'а' && c <= 'я') return c + 'А' - 'а';
return c;
}
inline char chrLwr(char c) {
if (c >= 'A' && c <= 'Z') return c + 'a' - 'A';
//if (c >= 'А' && c <= 'Я') return c + 'а' - 'А';
return c;
}
PIStringList& PIStringList::removeDuplicates() {
PIStringList l;
PIString s;
bool ae;
for (int i = 0; i < size_s(); ++i) {
ae = false;
s = at(i);
for (int j = 0; j < l.size_s(); ++j) {
if (s != l[j]) continue;
ae = true; break;
}
if (!ae) {
l << s;
continue;
}
remove(i);
--i;
}
return *this;
}