/* PIP - Platform Independent Primitives String Copyright (C) 2013 Ivan Pelipenko peri4ko@gmail.com This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "pistring.h" /*! \class PIString * \brief String class * \details PIP use this class for use string information. * * \section PIString_sec0 Synopsis * This class based on \a PIVector to store information. * String is a sequence of \a PIChar and can contain multibyte * symbols. Therefore real memory size of string is symbols count * 4. * String can be constucted from many types of data and can be converted * to many types. There are man operators and handly functions to use * string as you wish. * * \section PIString_sec1 To/from data convertions * Most common constructor is \a PIString(const char * str), where "str" * is null-terminated string, e.g. \c "string". This is 7 chars with last char = 0. * Also you can constructs \a PIString from single \a PIChar, \a PIByteArray, * other \a PIString or sequency of the same characters with custom length.\n \n * This class has implicit conversions to const char * and * \c std::string. Also there are functions to make same convertions: * * \a data() - to const char * , * * \a stdString() - to \c std::string, * * \a toByteArray() - to \a PIByteArray. * * \section PIString_sec2 Numeric operations * You can get symbolic representation of any numeric value with function * \a setNumber(any integer value, int base = 10, bool * ok = 0). Default * arguments are set for decimal base system, but you can choose any system * from 2 to 40. There are the same static functions \a fromNumber(), that * returns \a PIString. \n * Also there is function \a setReadableSize() which is set human-readable * size in bytes, Kb, Mb, Gb or Pb. Static analog is \a readableSize(). * */ const char PIString::toBaseN[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^'}; const int PIString::fromBaseN[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, -1, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; void PIString::appendFromChars(const char * c, int s) { int sz; wchar_t wc; for (int i = 0; i < s; ++i) { if (/*isascii(c[i])*/c[i] >= 0) { push_back(PIChar(c[i])); continue; } sz = mbtowc(&wc, &(c[i]), 4); //cout << sz << endl; switch (sz) { case 4: push_back(PIChar(*(int*)&(c[i]))); i += 3; continue; case 3: push_back(PIChar(*(int*)&(c[i]))); back().ch &= 0xFFFFFF; i += 2; continue; case 2: push_back(PIChar(*(short * )&(c[i]))); ++i; continue; default: push_back(PIChar(c[i])); break; } } } PIString & PIString::operator +=(const char * str) { int l = 0; while (str[l] != '\0') ++l; appendFromChars(str, l); return *this; } PIString & PIString::operator +=(const wchar_t * str) { //cout << "wc" << endl; int l = 0, sz; char * c = new char[MB_CUR_MAX]; while (str[l] != 0) ++l; for (int i = 0; i < l; ++i) { sz = wctomb(c, str[i]); switch (sz) { case 4: push_back(PIChar(*(int*)c)); continue; case 3: push_back(PIChar(*(int*)c)); back().ch &= 0xFFFFFF; continue; case 2: push_back(PIChar(*(short * )c)); continue; default: push_back(PIChar(c[0])); break; } } delete[] c; return *this; } #ifdef HAS_LOCALE PIString & PIString::operator +=(const wstring & str) { uint l = str.size(); for (uint i = 0; i < l; ++i) push_back(str[i]); return *this; } #endif PIString & PIString::operator +=(const PIString & str) { uint l = str.size(); for (uint i = 0; i < l; ++i) push_back(str[i]); return *this; } bool PIString::operator ==(const PIString & str) const { uint l = str.size(); if (size() != l) return false; for (uint i = 0; i < l; ++i) if (str[i] != at(i)) return false; return true; } bool PIString::operator !=(const PIString & str) const { uint l = str.size(); if (size() != l) return true; for (uint i = 0; i < l; ++i) if (str[i] != at(i)) return true; return false; } bool PIString::operator <(const PIString & str) const { uint l = str.size(); if (size() < l) return true; if (size() > l) return false; for (uint i = 0; i < l; ++i) { if (str[i] == at(i)) continue; if (str[i] < at(i)) return true; else return false; } return false; } bool PIString::operator >(const PIString & str) const { uint l = str.size(); if (size() < l) return false; if (size() > l) return true; for (uint i = 0; i < l; ++i) { if (str[i] == at(i)) continue; if (str[i] < at(i)) return false; else return true; } return false; } PIString PIString::mid(const int start, const int len) const { PIString str; int s = start, l = len; if (l == 0) return str; if (s < 0) { l += s; s = 0; } if (l < 0) { for (uint i = s; i < size(); ++i) str += at(i); } else { if (l > length() - s) l = length() - s; for (int i = s; i < s + l; ++i) str += at(i); } return str; } PIString & PIString::cutMid(const int start, const int len) { int s = start, l = len; if (l == 0) return *this; if (s < 0) { l += s; s = 0; } if (l < 0) remove(s, size() - s); else { if (l > length() - s) l = length() - s; remove(s, l); } return *this; } PIString & PIString::trim() { int st = 0, fn = 0; for (int i = 0; i < length(); ++i) if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12)) {st = i; break;} for (int i = length() - 1; i >= 0; --i) if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12)) {fn = i; break;} *this = mid(st, fn - st + 1); return *this; } PIString PIString::trimmed() const { int st = 0, fn = 0; for (int i = 0; i < length(); ++i) if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12)) {st = i; break;} for (int i = length() - 1; i >= 0; --i) if (at(i) != ' ' && at(i) != '\t' && at(i) != '\n' && at(i) != '\r' && at(i) != char(12)) {fn = i; break;} return mid(st, fn - st + 1); } PIString & PIString::replace(int from, int count, const PIString & with) { if (count < length() - from) remove(from, count); else remove(from, length() - from); uint c = with.length(); for (uint i = 0; i < c; ++i) insert(from + i, with[i]); return *this; } PIString & PIString::replace(const PIString & what, const PIString & with, bool * ok) { if (what.isEmpty()) { if (ok != 0) *ok = false; return *this; } int s = find(what); if (s >= 0) replace(s, what.length(), with); if (ok != 0) *ok = (s >= 0); return *this; } PIString & PIString::replaceAll(const PIString & what, const PIString & with) { if (what.isEmpty() || what == with) return *this; bool ok = true; while (ok) replace(what, with, &ok); return *this; } PIString & PIString::insert(int index, const PIString & str) { uint c = str.length(); for (uint i = 0; i < c; ++i) insert(index + i, str[i]); return *this; } PIStringList PIString::split(const PIString & delim) const { PIStringList sl; if (isEmpty() || delim.isEmpty()) return sl; PIString ts(*this); int ci = ts.find(delim); while (ci >= 0) { sl << ts.left(ci); ts.cutLeft(ci + delim.length()); ci = ts.find(delim); } if (ts.length() > 0) sl << ts; return sl; } int PIString::find(const char str, const int start) const { for (int i = start; i < length(); ++i) if (at(i) == str) return i; return -1; } int PIString::find(const PIString str, const int start) const { int l = str.length(); for (int i = start; i < length() - l + 1; ++i) if (mid(i, l) == str) return i; return -1; } int PIString::findLast(const char str, const int start) const { for (int i = length() - 1; i >= start; --i) if (at(i) == str) return i; return -1; } int PIString::findLast(const PIString str, const int start) const { int l = str.length(); for (int i = length() - l; i >= start; --i) if (mid(i, l) == str) return i; return -1; } PIString PIString::takeSymbol() { PIString ret; int sz = size_s(), ss = -1; for (int i = 0; i < sz; ++i) { PIChar c = at(i); if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; ss = i; break; } if (ss < 0) return ret; ret = mid(ss, 1); cutLeft(ss + 1); return ret; } PIString PIString::takeWord() { int sz = size_s(), ws = -1, we = -1; for (int i = 0; i < sz; ++i) { PIChar c = at(i); if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { if (we < 0 && ws >= 0) { we = i; break; } } else { if (ws < 0) ws = i; if (we >= 0) break; } } PIString ret = mid(ws, we - ws); cutLeft(we < 0 ? sz : we); return ret; } PIString PIString::takeCWord() { PIString ret; int sz = size_s(), ws = -1, we = -1; for (int i = 0; i < sz; ++i) { PIChar c = at(i); if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { if (we < 0 && ws >= 0) { we = i; break; } } else { if (ws < 0) { if (c.isAlpha() || c == '_') ws = i; else return ret; } else { if (!c.isAlpha() && !c.isDigit() && c != '_') { we = i; break; } } if (we >= 0) break; } } ret = mid(ws, we - ws); cutLeft(we < 0 ? sz : we); return ret; } PIString PIString::takeLine() { int sz = size_s(), le = -1; for (int i = 0; i < sz; ++i) { PIChar c = at(i); if (c == '\n') { le = i; break; } } PIString ret = left(le); if (!ret.isEmpty()) if (ret.back() == '\r') ret.cutRight(1); cutLeft(le < 0 ? sz : le + 1); return ret; } PIString PIString::takeNumber() { PIString ret; int sz = size_s(), ls = -1, le = -1, phase = 0; for (int i = 0; i < sz; ++i) { if (phase > 7) break; PIChar c = at(i); //piCout << "char " << c << "phase" << phase; switch (phase) { case 0: // trim if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; phase = 7; case 7: // sign if (c == '-' || c == '+') {ls = i; phase = 1; break;} case 1: // search start if (c >= '0' && c <= '9') {le = i; if (ls < 0) ls = i; phase = 2; break;} if (c == '.') {le = i; if (ls < 0) ls = i; phase = 3; break;} phase = 9; break; case 2: // integer if (c == '.') {le = i; phase = 3; break;} if (c == 'e' || c == 'E') {le = i; phase = 4; break;} if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == 'x') {le = i; break;} phase = 6; break; case 3: // point if (c == 'e' || c == 'E') {le = i; phase = 4; break;} if (c >= '0' && c <= '9') {le = i; break;} phase = 6; break; case 4: // exp if ((c >= '0' && c <= '9') || c == '-' || c == '+') {le = i; phase = 5; break;} phase = 6; break; case 5: // power if (c >= '0' && c <= '9') {le = i; break;} phase = 6; break; case 6: // suffix if (c == 'f' || c == 's' || c == 'u' || c == 'l' || c == 'L') {le = i; break;} phase = 9; break; } if (phase == 6) { if (c == 'f' || c == 's' || c == 'u' || c == 'l' || c == 'L') le = i; else phase = 9; } } //piCout << ls << le; if (le < ls) return ret; ret = mid(ls, le - ls + 1); cutLeft(le + 1); return ret; } PIString PIString::takeRange(const PIChar & start, const PIChar & end, const PIChar & shield) { PIString ret; bool trim_ = (start != ' ' && start != '\t' && start != '\n' && start != '\r'), eq = (start == end); int sz = size_s(), ls = -1, le = -1, cnt = 0; for (int i = 0; i < sz; ++i) { PIChar c = at(i); if (c == shield) {++i; continue;} if (trim_) { if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; trim_ = false; } if (eq) { if (c == start) { if (cnt == 0) ls = i; else {le = i; cnt = 0; break;} cnt++; } } else { if (c == start) { if (cnt == 0) ls = i; cnt++; } if (c == end) { cnt--; if (cnt == 0) le = i; } } if (cnt <= 0) break; } //piCout << ls << le << cnt; if (le < ls || ls < 0 || le < 0 || cnt != 0) return ret; ret = mid(ls + 1, le - ls - 1); cutLeft(le + 1); return ret; } PIString PIString::toUpperCase() const { PIString str(*this); int l = str.size(); for (int i = 0; i < l; ++i) str[i] = str[i].toUpper(); return str; } PIString PIString::toLowerCase() const { PIString str(*this); int l = str.size(); for (int i = 0; i < l; ++i) str[i] = str[i].toLower(); return str; } int PIString::lengthAscii() const { int j = 0; for (int i = 0; i < size_s(); ++i, ++j) if (!at(i).isAscii()) ++j; return j; } const char * PIString::data() const { data_.clear(); uint wc; uchar tc; //printf("PIString::data %d\n", size_s()); for (int i = 0, j = 0; i < size_s(); ++i) { wc = uint(at(i).toInt()); //printf("__%d_%d\n", i, wc); while (tc = wc & 0xFF, tc) { data_.push_back(uchar(tc)); ++j; wc >>= 8; //printf("____%d\n", wc); } /*if (at(i).isAscii()) data_.push_back(uchar(at(i).toAscii())); else { data_.push_back((at(i).toCharPtr()[0])); ++j; data_.push_back((at(i).toCharPtr()[1])); }*/ } data_.push_back(uchar('\0')); return (const char * )data_.data(); } string PIString::convertToStd() const { string s; uint wc; uchar tc; if (size() > 0) { for (int i = 0; i < length(); ++i) { wc = uint(at(i).toInt()); while (tc = wc & 0xFF, tc) { s.push_back(char(tc)); wc >>= 8; } /*if (at(i).isAscii()) s.push_back(at(i).toAscii()); else { s.push_back(at(i).toCharPtr()[0]); s.push_back(at(i).toCharPtr()[1]); }*/ } } return s; } char PIString::toChar() const { PIString s(toNativeDecimalPoints()); char v; sscanf(s.data(), "%c", &v); return v; } /* short PIString::toShort() const { PIString s(trimmed().toLowerCase().toNativeDecimalPoints()); short v; if (s.left(2) == "0x") {sscanf(s.data(), "%hx", &v); return v;} if (s.left(1) == "0") {sscanf(s.data(), "%ho", &v); return v;} sscanf(s.data(), "%hd", &v); return v; } int PIString::toInt() const { PIString s(trimmed().toLowerCase().toNativeDecimalPoints()); int v; if (s.left(2) == "0x") {sscanf(s.data(), "%x", &v); return v;} if (s.left(1) == "0") {sscanf(s.data(), "%o", &v); return v;} sscanf(s.data(), "%d", &v); return v; } long PIString::toLong() const { PIString s(trimmed().toLowerCase().toNativeDecimalPoints()); long v; if (s.left(2) == "0x") {sscanf(s.data(), "%lx", &v); return v;} if (s.left(1) == "0") {sscanf(s.data(), "%lo", &v); return v;} sscanf(s.data(), "%ld", &v); return v; } llong PIString::toLLong() const { PIString s(trimmed().toLowerCase().toNativeDecimalPoints()); llong v; if (s.left(2) == "0x") {sscanf(s.data(), "%llx", &v); return v;} if (s.left(1) == "0") {sscanf(s.data(), "%llo", &v); return v;} sscanf(s.data(), "%lld", &v); return v; } */ PIString & PIString::setReadableSize(llong bytes) { clear(); if (bytes < 1024) {*this += (PIString::fromNumber(bytes) + " B"); return *this;} double fres = bytes / 1024.; llong res = bytes / 1024; fres -= res; if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " kB"); return *this;} fres = res / 1024.; res /= 1024; fres -= res; if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " MB"); return *this;} fres = res / 1024.; res /= 1024; fres -= res; if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " GB"); return *this;} fres = res / 1024.; res /= 1024; fres -= res; if (res < 1024) {*this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " TB"); return *this;} fres = res / 1024.; res /= 1024; fres -= res; *this += (PIString::fromNumber(res) + "." + PIString::fromNumber(llong(fres * 10)).left(1) + " PB"); return *this; } inline char chrUpr(char c) { if (c >= 'a' && c <= 'z') return c + 'A' - 'a'; //if (c >= 'а' && c <= 'я') return c + 'А' - 'а'; return c; } inline char chrLwr(char c) { if (c >= 'A' && c <= 'Z') return c + 'a' - 'A'; //if (c >= 'А' && c <= 'Я') return c + 'а' - 'А'; return c; } PIStringList& PIStringList::removeDuplicates() { PIStringList l; PIString s; bool ae; for (uint i = 0; i < size(); ++i) { ae = false; s = at(i); for (uint j = 0; j < l.size(); ++j) { if (s != l[j]) continue; ae = true; break; } if (!ae) { l << s; continue; } remove(i); --i; } return *this; }