/*
PIP - Platform Independent Primitives
String
Copyright (C) 2014 Ivan Pelipenko peri4ko@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include "pistring.h"
/*! \class PIString
* \brief String class
* \details PIP use this class for use string information.
*
* \section PIString_sec0 Synopsis
* This class based on \a PIVector to store information.
* String is a sequence of \a PIChar and can contain multibyte
* symbols. Therefore real memory size of string is symbols count * 4.
* String can be constucted from many types of data and can be converted
* to many types. There are man operators and handly functions to use
* string as you wish.
*
* \section PIString_sec1 To/from data convertions
* Most common constructor is \a PIString(const char * str), where "str"
* is null-terminated string, e.g. \c "string". This is 7 chars with last char = 0.
* Also you can constructs \a PIString from single \a PIChar, \a PIByteArray,
* other \a PIString or sequency of the same characters with custom length.\n \n
* This class has implicit conversions to const char * and
* \c std::string. Also there are functions to make same convertions:
* * \a data() - to const char * ,
* * \a stdString() - to \c std::string,
* * \a toByteArray() - to \a PIByteArray.
*
* \section PIString_sec2 Numeric operations
* You can get symbolic representation of any numeric value with function
* \a setNumber(any integer value, int base = 10, bool * ok = 0). Default
* arguments are set for decimal base system, but you can choose any system
* from 2 to 40. There are the same static functions \a fromNumber(), that
* returns \a PIString. \n
* Also there is function \a setReadableSize() which is set human-readable
* size in bytes, Kb, Mb, Gb or Pb. Static analog is \a readableSize().
*
*/
const char PIString::toBaseN[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^'};
const int PIString::fromBaseN[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, -1,
-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
void PIString::appendFromChars(const char * c, int s) {
int sz;
wchar_t wc;
for (int i = 0; i < s; ++i) {
if (/*isascii(c[i])*/c[i] >= 0) {
push_back(PIChar(c[i]));
continue;
}
sz = mbtowc(&wc, &(c[i]), 4);
//cout << sz << endl;
switch (sz) {
case 4:
push_back(PIChar(*(int*)&(c[i])));
i += 3;
continue;
case 3:
push_back(PIChar(*(int*)&(c[i])));
back().ch &= 0xFFFFFF;
i += 2;
continue;
case 2:
push_back(PIChar(*(short * )&(c[i])));
++i;
continue;
default:
push_back(PIChar(c[i]));
break;
}
}
}
PIString & PIString::operator +=(const char * str) {
int l = 0;
while (str[l] != '\0') ++l;
appendFromChars(str, l);
return *this;
}
PIString & PIString::operator +=(const wchar_t * str) {
//cout << "wc" << endl;
int l = 0, sz;
char * c = new char[MB_CUR_MAX];
while (str[l] != 0) ++l;
for (int i = 0; i < l; ++i) {
sz = wctomb(c, str[i]);
switch (sz) {
case 4:
push_back(PIChar(*(int*)c));
continue;
case 3:
push_back(PIChar(*(int*)c));
back().ch &= 0xFFFFFF;
continue;
case 2:
push_back(PIChar(*(short * )c));
continue;
default:
push_back(PIChar(c[0]));
break;
}
}
delete[] c;
return *this;
}
#ifdef HAS_LOCALE
PIString & PIString::operator +=(const wstring & str) {
uint l = str.size();
for (uint i = 0; i < l; ++i) push_back(str[i]);
return *this;
}
#endif
PIString & PIString::operator +=(const PIString & str) {
//uint l = str.size();
*((PIDeque*)this) << *((PIDeque*)&str);
return *this;
}
bool PIString::operator ==(const PIString & str) const {
uint l = str.size();
if (size() != l) return false;
for (uint i = 0; i < l; ++i)
if (str[i] != at(i))
return false;
return true;
}
bool PIString::operator !=(const PIString & str) const {
uint l = str.size();
if (size() != l) return true;
for (uint i = 0; i < l; ++i)
if (str[i] != at(i))
return true;
return false;
}
bool PIString::operator <(const PIString & str) const {
uint l = str.size();
if (size() < l) return true;
if (size() > l) return false;
for (uint i = 0; i < l; ++i) {
if (str[i] == at(i)) continue;
if (str[i] < at(i)) return true;
else return false;
}
return false;
}
bool PIString::operator >(const PIString & str) const {
uint l = str.size();
if (size() < l) return false;
if (size() > l) return true;
for (uint i = 0; i < l; ++i) {
if (str[i] == at(i)) continue;
if (str[i] < at(i)) return false;
else return true;
}
return false;
}
PIString PIString::mid(const int start, const int len) const {
//PIString str;
int s = start, l = len;
if (l == 0) return PIString();
if (s < 0) {
l += s;
s = 0;
}
if (l < 0) {
//for (uint i = s; i < size(); ++i)
// str += at(i);
return PIString(&(at(s)), size() - s);
} else {
if (l > length() - s)
l = length() - s;
//for (int i = s; i < s + l; ++i)
// str += at(i);
return PIString(&(at(s)), l);
}
return PIString();
}
PIString & PIString::cutMid(const int start, const int len) {
int s = start, l = len;
if (l == 0) return *this;
if (s < 0) {
l += s;
s = 0;
}
if (l < 0)
remove(s, size() - s);
else {
if (l > length() - s)
l = length() - s;
remove(s, l);
}
return *this;
}
PIString & PIString::trim() {
int st = 0, fn = 0;
for (int i = 0; i < length(); ++i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{st = i; break;}
for (int i = length() - 1; i >= 0; --i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{fn = i; break;}
//*this = mid(st, fn - st + 1);
if (fn < size_s() - 1) cutRight(size_s() - fn - 1);
if (st > 0) cutLeft(st);
return *this;
}
PIString PIString::trimmed() const {
int st = 0, fn = 0;
for (int i = 0; i < length(); ++i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{st = i; break;}
for (int i = length() - 1; i >= 0; --i)
if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12))
{fn = i; break;}
return mid(st, fn - st + 1);
}
PIString & PIString::replace(int from, int count, const PIString & with) {
if (count < length() - from) remove(from, count);
else remove(from, length() - from);
uint c = with.length();
for (uint i = 0; i < c; ++i) insert(from + i, with[i]);
return *this;
}
PIString & PIString::replace(const PIString & what, const PIString & with, bool * ok) {
//piCout << "replace" << what << with;
if (what.isEmpty()) {
if (ok != 0) *ok = false;
return *this;
}
int s = find(what);
if (s >= 0) replace(s, what.length(), with);
if (ok != 0) *ok = (s >= 0);
return *this;
}
PIString & PIString::replaceAll(const PIString & what, const PIString & with) {
if (what.isEmpty() || what == with) return *this;
bool ok = true;
while (ok) replace(what, with, &ok);
return *this;
}
PIString & PIString::insert(int index, const PIString & str) {
//uint c = str.length();
//for (uint i = 0; i < c; ++i) insert(index + i, str[i]);
PIDeque::insert(index, *((const PIDeque*)&str));
return *this;
}
PIStringList PIString::split(const PIString & delim) const {
PIStringList sl;
if (isEmpty() || delim.isEmpty()) return sl;
PIString ts(*this);
int ci = ts.find(delim);
while (ci >= 0) {
sl << ts.left(ci);
ts.cutLeft(ci + delim.length());
ci = ts.find(delim);
}
if (ts.length() > 0) sl << ts;
return sl;
}
int PIString::find(const char str, const int start) const {
for (int i = start; i < length(); ++i)
if (at(i) == str)
return i;
return -1;
}
int PIString::find(const PIString str, const int start) const {
int l = str.length();
for (int i = start; i < length() - l + 1; ++i)
if (mid(i, l) == str)
return i;
return -1;
}
int PIString::findLast(const char str, const int start) const {
for (int i = length() - 1; i >= start; --i)
if (at(i) == str)
return i;
return -1;
}
int PIString::findLast(const PIString str, const int start) const {
int l = str.length();
for (int i = length() - l; i >= start; --i)
if (mid(i, l) == str)
return i;
return -1;
}
int PIString::findWord(const PIString & word, const int start) const {
int f = start - 1, tl = length(), wl = word.length();
while ((f = find(word, f + 1)) >= 0) {
bool ok = true;
PIChar c;
if (f > 0) {c = (*this)[f - 1]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r')) {ok = false; continue;}}
if (f + wl < tl) {c = (*this)[f + wl]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r')) {ok = false; continue;}}
if (ok) return f;
}
return -1;
}
int PIString::findCWord(const PIString & word, const int start) const {
int f = start - 1, tl = length(), wl = word.length();
while ((f = find(word, f + 1)) >= 0) {
bool ok = true;
PIChar c;
if (f > 0) {c = (*this)[f - 1]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r' || (c != '_' && !c.isAlpha() && !c.isDigit()))) {ok = false; continue;}}
if (f + wl < tl) {c = (*this)[f + wl]; if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r' || (c != '_' && !c.isAlpha() && !c.isDigit()))) {ok = false; continue;}}
if (ok) return f;
}
return -1;
}
bool PIString::startsWith(const PIString & str) const {
if (size() < str.size()) return false;
return str == left(str.size());
}
bool PIString::endsWith(const PIString & str) const {
if (size() < str.size()) return false;
return str == right(str.size());
}
PIString PIString::takeSymbol() {
PIString ret;
int sz = size_s(), ss = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
ss = i;
break;
}
if (ss < 0) return ret;
ret = mid(ss, 1);
cutLeft(ss + 1);
return ret;
}
PIString PIString::takeWord() {
int sz = size_s(), ws = -1, we = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
if (we < 0 && ws >= 0) {
we = i;
break;
}
} else {
if (ws < 0) ws = i;
if (we >= 0) break;
}
}
PIString ret = mid(ws, we - ws);
cutLeft(we < 0 ? sz : we);
return ret;
}
PIString PIString::takeCWord() {
PIString ret;
int sz = size_s(), ws = -1, we = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
if (we < 0 && ws >= 0) {
we = i;
break;
}
} else {
if (ws < 0) {
if (c.isAlpha() || c == '_')
ws = i;
else
return ret;
} else {
if (!c.isAlpha() && !c.isDigit() && c != '_') {
we = i;
break;
}
}
if (we >= 0) break;
}
}
ret = mid(ws, we - ws);
cutLeft(we < 0 ? sz : we);
return ret;
}
PIString PIString::takeLine() {
int sz = size_s(), le = -1;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == '\n') {
le = i;
break;
}
}
PIString ret = left(le);
if (!ret.isEmpty())
if (ret.back() == '\r')
ret.cutRight(1);
cutLeft(le < 0 ? sz : le + 1);
return ret;
}
PIString PIString::takeNumber() {
PIString ret;
int sz = size_s(), ls = -1, le = -1, phase = 0;
for (int i = 0; i < sz; ++i) {
if (phase > 7) break;
PIChar c = at(i);
//piCout << "char " << c << "phase" << phase;
switch (phase) {
case 0: // trim
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
phase = 7;
case 7: // sign
if (c == '-' || c == '+') {ls = i; phase = 1; break;}
case 1: // search start
if (c >= '0' && c <= '9') {le = i; if (ls < 0) ls = i; phase = 2; break;}
if (c == '.') {le = i; if (ls < 0) ls = i; phase = 3; break;}
phase = 9;
break;
case 2: // integer
if (c == '.') {le = i; phase = 3; break;}
if (c == 'e' || c == 'E') {le = i; phase = 4; break;}
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == 'x') {le = i; break;}
phase = 6;
break;
case 3: // point
if (c == 'e' || c == 'E') {le = i; phase = 4; break;}
if (c >= '0' && c <= '9') {le = i; break;}
phase = 6;
break;
case 4: // exp
if ((c >= '0' && c <= '9') || c == '-' || c == '+') {le = i; phase = 5; break;}
phase = 6;
break;
case 5: // power
if (c >= '0' && c <= '9') {le = i; break;}
phase = 6;
break;
case 6: // suffix
if (c == 'f' || c == 's' || c == 'u' || c == 'l' || c == 'L') {le = i; break;}
phase = 9;
break;
}
if (phase == 6) {
if (c == 'f' || c == 's' || c == 'u' || c == 'l' || c == 'L') le = i;
else phase = 9;
}
}
//piCout << ls << le;
if (le < ls) return ret;
ret = mid(ls, le - ls + 1);
cutLeft(le + 1);
return ret;
}
PIString PIString::takeRange(const PIChar & start, const PIChar & end, const PIChar & shield) {
PIString ret;
bool trim_ = (start != ' ' && start != '\t' && start != '\n' && start != '\r'), eq = (start == end);
int sz = size_s(), ls = -1, le = -1, cnt = 0;
for (int i = 0; i < sz; ++i) {
PIChar c = at(i);
if (c == shield) {++i; continue;}
if (trim_) {
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
trim_ = false;
}
if (eq) {
if (c == start) {
if (cnt == 0) ls = i;
else {le = i; cnt = 0; break;}
cnt++;
}
} else {
if (c == start) {
if (cnt == 0) ls = i;
cnt++;
}
if (c == end) {
cnt--;
if (cnt == 0) le = i;
}
}
if (cnt <= 0) break;
}
//piCout << ls << le << cnt;
if (le < ls || ls < 0 || le < 0 || cnt != 0) return ret;
ret = mid(ls + 1, le - ls - 1);
cutLeft(le + 1);
return ret;
}
PIString PIString::toUpperCase() const {
PIString str(*this);
int l = str.size();
for (int i = 0; i < l; ++i) str[i] = str[i].toUpper();
return str;
}
PIString PIString::toLowerCase() const {
PIString str(*this);
int l = str.size();
for (int i = 0; i < l; ++i) str[i] = str[i].toLower();
return str;
}
int PIString::lengthAscii() const {
int j = 0;
for (int i = 0; i < size_s(); ++i, ++j)
if (!at(i).isAscii()) ++j;
return j;
}
const char * PIString::data() const {
data_.clear();
uint wc;
uchar tc;
//printf("PIString::data %d\n", size_s());
for (int i = 0, j = 0; i < size_s(); ++i) {
wc = uint(at(i).toInt());
//printf("__%d_%d\n", i, wc);
while (tc = wc & 0xFF, tc) {
data_.push_back(uchar(tc)); ++j;
wc >>= 8;
//printf("____%d\n", wc);
}
/*if (at(i).isAscii())
data_.push_back(uchar(at(i).toAscii()));
else {
data_.push_back((at(i).toCharPtr()[0])); ++j;
data_.push_back((at(i).toCharPtr()[1]));
}*/
}
data_.push_back(uchar('\0'));
return (const char * )data_.data();
}
string PIString::convertToStd() const {
string s;
uint wc;
uchar tc;
if (size() > 0) {
for (int i = 0; i < length(); ++i) {
wc = uint(at(i).toInt());
while (tc = wc & 0xFF, tc) {
s.push_back(char(tc));
wc >>= 8;
}
/*if (at(i).isAscii())
s.push_back(at(i).toAscii());
else {
s.push_back(at(i).toCharPtr()[0]);
s.push_back(at(i).toCharPtr()[1]);
}*/
}
}
return s;
}
char PIString::toChar() const {
PIString s(toNativeDecimalPoints());
char v;
sscanf(s.data(), "%c", &v);
return v;
}
/*
short PIString::toShort() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
short v;
if (s.left(2) == "0x") {sscanf(s.data(), "%hx", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%ho", &v); return v;}
sscanf(s.data(), "%hd", &v);
return v;
}
int PIString::toInt() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
int v;
if (s.left(2) == "0x") {sscanf(s.data(), "%x", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%o", &v); return v;}
sscanf(s.data(), "%d", &v);
return v;
}
long PIString::toLong() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
long v;
if (s.left(2) == "0x") {sscanf(s.data(), "%lx", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%lo", &v); return v;}
sscanf(s.data(), "%ld", &v);
return v;
}
llong PIString::toLLong() const {
PIString s(trimmed().toLowerCase().toNativeDecimalPoints());
llong v;
if (s.left(2) == "0x") {sscanf(s.data(), "%llx", &v); return v;}
if (s.left(1) == "0") {sscanf(s.data(), "%llo", &v); return v;}
sscanf(s.data(), "%lld", &v);
return v;
}
*/
PIString & PIString::setReadableSize(llong bytes) {
clear();
if (bytes < 1024) {*this += (PIString::fromNumber(bytes) + " B"); return *this;}
double fres = bytes / 1024.;
llong res = bytes / 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " kB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " MB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " GB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " TB"); return *this;}
fres = res / 1024.;
res /= 1024;
fres -= res;
*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " PB");
return *this;
}
inline char chrUpr(char c) {
if (c >= 'a' && c <= 'z') return c + 'A' - 'a';
//if (c >= 'а' && c <= 'я') return c + 'А' - 'а';
return c;
}
inline char chrLwr(char c) {
if (c >= 'A' && c <= 'Z') return c + 'a' - 'A';
//if (c >= 'А' && c <= 'Я') return c + 'а' - 'А';
return c;
}
PIStringList& PIStringList::removeDuplicates() {
PIStringList l;
PIString s;
bool ae;
for (int i = 0; i < size_s(); ++i) {
ae = false;
s = at(i);
for (int j = 0; j < l.size_s(); ++j) {
if (s != l[j]) continue;
ae = true; break;
}
if (!ae) {
l << s;
continue;
}
remove(i);
--i;
}
return *this;
}