arsa  2.7
irrUString.h
Go to the documentation of this file.
1 /*
2  Basic Unicode string class for Irrlicht.
3  Copyright (c) 2009-2011 John Norman
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any
7  damages arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any
10  purpose, including commercial applications, and to alter it and
11  redistribute it freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you
14  must not claim that you wrote the original software. If you use
15  this software in a product, an acknowledgment in the product
16  documentation would be appreciated but is not required.
17 
18  2. Altered source versions must be plainly marked as such, and
19  must not be misrepresented as being the original software.
20 
21  3. This notice may not be removed or altered from any source
22  distribution.
23 
24  The original version of this class can be located at:
25  http://irrlicht.suckerfreegames.com/
26 
27  John Norman
28  john@suckerfreegames.com
29 */
30 
31 #ifndef __IRR_USTRING_H_INCLUDED__
32 #define __IRR_USTRING_H_INCLUDED__
33 
34 #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
35 # define USTRING_CPP0X
36 # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
37 # define USTRING_CPP0X_NEWLITERALS
38 # endif
39 #endif
40 
41 #undef USTRING_CPP0X
42 #define USTRING_NO_STL
43 
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 
48 #ifdef USTRING_CPP0X
49 # include <utility>
50 #endif
51 
52 #ifndef USTRING_NO_STL
53 # include <string>
54 # include <iterator>
55 # include <ostream>
56 # include <functional>
57 #endif
58 
59 #include "irrTypes.h"
60 #include "irrAllocator.h"
61 #include "irrArray.h"
62 #include "irrMath.h"
63 #include "irrString.h"
64 #include "path.h"
65 
67 static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
68 static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
69 
71 #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
72 #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
73 #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
74 
75 
76 namespace irr
77 {
78 
79  // Define our character types.
80 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
81  typedef char32_t uchar32_t;
82  typedef char16_t uchar16_t;
83  typedef char uchar8_t;
84 #else
85  typedef u32 uchar32_t;
86  typedef u16 uchar16_t;
87  typedef u8 uchar8_t;
88 #endif
89 
90 namespace core
91 {
92 
93 namespace unicode
94 {
95 
98 
104 {
105  // Convert the surrogate pair into a single UTF-32 character.
106  uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
107  uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
108  return (wu << 16) | x;
109 }
110 
114 {
115  return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
116 }
117 
121 {
122  return ((c >> 24) & 0x000000FF) |
123  ((c >> 8) & 0x0000FF00) |
124  ((c << 8) & 0x00FF0000) |
125  ((c << 24) & 0xFF000000);
126 }
127 
129 const u16 BOM = 0xFEFF;
130 
132 const u8 BOM_UTF8_LEN = 3;
133 const u8 BOM_UTF16_LEN = 1;
134 const u8 BOM_UTF32_LEN = 1;
135 
137 const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
138 const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
139 const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
140 const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
141 const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
142 
147 
150 {
159 };
160 
163 {
167 };
168 
171 
175 {
176 #define COPY_ARRAY(source, size) \
177  memcpy(ret.pointer(), source, size); \
178  ret.set_used(size)
179 
180  core::array<u8> ret(4);
181  switch (mode)
182  {
183  case EUTFE_UTF8:
185  break;
186  case EUTFE_UTF16:
187  #ifdef __BIG_ENDIAN__
189  #else
191  #endif
192  break;
193  case EUTFE_UTF16_BE:
195  break;
196  case EUTFE_UTF16_LE:
198  break;
199  case EUTFE_UTF32:
200  #ifdef __BIG_ENDIAN__
202  #else
204  #endif
205  break;
206  case EUTFE_UTF32_BE:
208  break;
209  case EUTFE_UTF32_LE:
211  break;
212  }
213  return ret;
214 
215 #undef COPY_ARRAY
216 }
217 
222 {
223  if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
224  if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
225  if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
226  if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
227  if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
228  return EUTFE_NONE;
229 }
230 
231 } // end namespace unicode
232 
233 
235 template <typename TAlloc = irrAllocator<uchar16_t> >
237 {
238 public:
239 
243 
246  {
247  public:
248  _ustring16_iterator_access(const ustring16<TAlloc>* s, size_t p) : ref(s), pos(p) {}
249 
251  operator uchar32_t() const
252  {
253  return _get();
254  }
255 
260  {
261  _set(c);
262  return *this;
263  }
264 
268  {
269  _set(_get() + 1);
270  return *this;
271  }
272 
276  {
277  uchar32_t old = _get();
278  _set(old + 1);
279  return old;
280  }
281 
285  {
286  _set(_get() - 1);
287  return *this;
288  }
289 
293  {
294  uchar32_t old = _get();
295  _set(old - 1);
296  return old;
297  }
298 
303  {
304  _set(_get() + val);
305  return *this;
306  }
307 
312  {
313  _set(_get() - val);
314  return *this;
315  }
316 
321  {
322  _set(_get() * val);
323  return *this;
324  }
325 
330  {
331  _set(_get() / val);
332  return *this;
333  }
334 
339  {
340  _set(_get() % val);
341  return *this;
342  }
343 
348  {
349  return _get() + val;
350  }
351 
356  {
357  return _get() - val;
358  }
359 
364  {
365  return _get() * val;
366  }
367 
372  {
373  return _get() / val;
374  }
375 
380  {
381  return _get() % val;
382  }
383 
384  private:
386  uchar32_t _get() const
387  {
388  const uchar16_t* a = ref->c_str();
389  if (!UTF16_IS_SURROGATE(a[pos]))
390  return static_cast<uchar32_t>(a[pos]);
391  else
392  {
393  if (pos + 1 >= ref->size_raw())
394  return 0;
395 
396  return unicode::toUTF32(a[pos], a[pos + 1]);
397  }
398  }
399 
401  void _set(uchar32_t c)
402  {
403  ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
404  const uchar16_t* a = ref2->c_str();
405  if (c > 0xFFFF)
406  {
407  // c will be multibyte, so split it up into the high and low surrogate pairs.
408  uchar16_t x = static_cast<uchar16_t>(c);
409  uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
410  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
411 
412  // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
413  if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
414  ref2->replace_raw(vl, pos + 1);
415  else ref2->insert_raw(vl, pos + 1);
416 
417  ref2->replace_raw(vh, pos);
418  }
419  else
420  {
421  // c will be a single byte.
422  uchar16_t vh = static_cast<uchar16_t>(c);
423 
424  // If the previous position was a surrogate pair, remove the extra byte.
425  if (UTF16_IS_SURROGATE_HI(a[pos]))
426  ref2->erase_raw(pos + 1);
427 
428  ref2->replace_raw(vh, pos);
429  }
430  }
431 
432  const ustring16<TAlloc>* ref;
433  size_t pos;
434  };
436 
437 
439 #ifndef USTRING_NO_STL
440  class _ustring16_const_iterator : public std::iterator<
441  std::bidirectional_iterator_tag, // iterator_category
442  access, // value_type
443  ptrdiff_t, // difference_type
444  const access, // pointer
445  const access // reference
446  >
447 #else
449 #endif
450  {
451  public:
453  //typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
454  typedef const access const_pointer;
455  typedef const access const_reference;
456 
457 #ifndef USTRING_NO_STL
458  typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
459  typedef typename _Base::value_type value_type;
460  typedef typename _Base::difference_type difference_type;
461  typedef typename _Base::difference_type distance_type;
462  typedef typename _Base::pointer pointer;
463  typedef const_reference reference;
464 #else
466  typedef size_t difference_type;
467  typedef size_t distance_type;
470 #endif
471 
475  _ustring16_const_iterator(const ustring16<TAlloc>& s, const size_t p) : ref(&s), pos(0)
476  {
477  if (ref->size_raw() == 0 || p == 0)
478  return;
479 
480  // Go to the appropriate position.
481  size_t i = p;
482  size_t sr = ref->size_raw();
483  const uchar16_t* a = ref->c_str();
484  while (i != 0 && pos < sr)
485  {
487  pos += 2;
488  else ++pos;
489  --i;
490  }
491  }
492 
494  bool operator==(const _Iter& iter) const
495  {
496  if (ref == iter.ref && pos == iter.pos)
497  return true;
498  return false;
499  }
500 
502  bool operator!=(const _Iter& iter) const
503  {
504  if (ref != iter.ref || pos != iter.pos)
505  return true;
506  return false;
507  }
508 
511  { // ++iterator
512  if (pos == ref->size_raw()) return *this;
513  const uchar16_t* a = ref->c_str();
515  pos += 2; // TODO: check for valid low surrogate?
516  else ++pos;
517  if (pos > ref->size_raw()) pos = ref->size_raw();
518  return *this;
519  }
520 
523  { // iterator++
524  _Iter _tmp(*this);
525  ++*this;
526  return _tmp;
527  }
528 
531  { // --iterator
532  if (pos == 0) return *this;
533  const uchar16_t* a = ref->c_str();
534  --pos;
535  if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
536  --pos;
537  return *this;
538  }
539 
542  { // iterator--
543  _Iter _tmp(*this);
544  --*this;
545  return _tmp;
546  }
547 
551  {
552  if (v == 0) return *this;
553  if (v < 0) return operator-=(v * -1);
554 
555  if (pos >= ref->size_raw())
556  return *this;
557 
558  // Go to the appropriate position.
559  difference_type i = v;
560  size_t sr = ref->size_raw();
561  const uchar16_t* a = ref->c_str();
562  while (i != 0 && pos < sr)
563  {
565  pos += 2;
566  else ++pos;
567  --i;
568  }
569  if (pos > sr)
570  pos = sr;
571 
572  return *this;
573  }
574 
578  {
579  if (v == 0) return *this;
580  if (v > 0) return operator+=(v * -1);
581 
582  if (pos == 0)
583  return *this;
584 
585  // Go to the appropriate position.
586  difference_type i = v;
587  const uchar16_t* a = ref->c_str();
588  while (i != 0 && pos != 0)
589  {
590  --pos;
591  if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
592  --pos;
593  --i;
594  }
595 
596  return *this;
597  }
598 
601  {
602  _Iter ret(*this);
603  ret += v;
604  return ret;
605  }
606 
609  {
610  _Iter ret(*this);
611  ret -= v;
612  return ret;
613  }
614 
616  difference_type operator-(const _Iter& iter) const
617  {
618  // Make sure we reference the same object!
619  if (ref != iter.ref)
620  return difference_type();
621 
622  _Iter i = iter;
623  difference_type ret;
624 
625  // Walk up.
626  if (pos > i.pos)
627  {
628  while (pos > i.pos)
629  {
630  ++i;
631  ++ret;
632  }
633  return ret;
634  }
635 
636  // Walk down.
637  while (pos < i.pos)
638  {
639  --i;
640  --ret;
641  }
642  return ret;
643  }
644 
647  {
648  if (pos >= ref->size_raw())
649  {
650  const uchar16_t* a = ref->c_str();
651  size_t p = ref->size_raw();
652  if (UTF16_IS_SURROGATE_LO(a[p]))
653  --p;
654  reference ret(ref, p);
655  return ret;
656  }
657  const_reference ret(ref, pos);
658  return ret;
659  }
660 
663  {
664  if (pos >= ref->size_raw())
665  {
666  const uchar16_t* a = ref->c_str();
667  size_t p = ref->size_raw();
668  if (UTF16_IS_SURROGATE_LO(a[p]))
669  --p;
670  reference ret(ref, p);
671  return ret;
672  }
673  reference ret(ref, pos);
674  return ret;
675  }
676 
679  {
680  return operator*();
681  }
682 
685  {
686  return operator*();
687  }
688 
690  bool atStart() const
691  {
692  return pos == 0;
693  }
694 
696  bool atEnd() const
697  {
698  const uchar16_t* a = ref->c_str();
699  if (UTF16_IS_SURROGATE(a[pos]))
700  return (pos + 1) >= ref->size_raw();
701  else return pos >= ref->size_raw();
702  }
703 
705  void toStart()
706  {
707  pos = 0;
708  }
709 
711  void toEnd()
712  {
713  const uchar16_t* a = ref->c_str();
714  pos = ref->size_raw();
715  }
716 
719  size_t getPos() const
720  {
721  return pos;
722  }
723 
724  protected:
726  size_t pos;
727  };
728 
731  {
732  public:
737 
738  typedef typename _Base::value_type value_type;
741  typedef access pointer;
742  typedef access reference;
743 
744  using _Base::pos;
745  using _Base::ref;
746 
751 
754  {
755  if (pos >= ref->size_raw())
756  {
757  const uchar16_t* a = ref->c_str();
758  size_t p = ref->size_raw();
759  if (UTF16_IS_SURROGATE_LO(a[p]))
760  --p;
761  reference ret(ref, p);
762  return ret;
763  }
764  reference ret(ref, pos);
765  return ret;
766  }
767 
770  {
771  if (pos >= ref->size_raw())
772  {
773  const uchar16_t* a = ref->c_str();
774  size_t p = ref->size_raw();
775  if (UTF16_IS_SURROGATE_LO(a[p]))
776  --p;
777  reference ret(ref, p);
778  return ret;
779  }
780  reference ret(ref, pos);
781  return ret;
782  }
783 
786  {
787  return operator*();
788  }
789 
792  {
793  return operator*();
794  }
795  };
796 
799 
803 
804 
808  static const size_t npos = -1;
809 
810 
813  : array(0), allocated(1), used(0)
814  {
815 #if __BIG_ENDIAN__
816  encoding = unicode::EUTFE_UTF16_BE;
817 #else
818  encoding = unicode::EUTFE_UTF16_LE;
819 #endif
820  array = allocator.allocate(1); // new u16[1];
821  array[0] = 0x0;
822  }
823 
824 
827  : array(0), allocated(0), used(0)
828  {
829 #if __BIG_ENDIAN__
830  encoding = unicode::EUTFE_UTF16_BE;
831 #else
832  encoding = unicode::EUTFE_UTF16_LE;
833 #endif
834  *this = other;
835  }
836 
837 
839  template <class B, class A>
840  ustring16(const string<B, A>& other)
841  : array(0), allocated(0), used(0)
842  {
843 #if __BIG_ENDIAN__
844  encoding = unicode::EUTFE_UTF16_BE;
845 #else
846  encoding = unicode::EUTFE_UTF16_LE;
847 #endif
848  *this = other;
849  }
850 
851 
852 #ifndef USTRING_NO_STL
853  template <class B, class A, typename Alloc>
855  ustring16(const std::basic_string<B, A, Alloc>& other)
856  : array(0), allocated(0), used(0)
857  {
858 #if __BIG_ENDIAN__
859  encoding = unicode::EUTFE_UTF16_BE;
860 #else
861  encoding = unicode::EUTFE_UTF16_LE;
862 #endif
863  *this = other.c_str();
864  }
865 
866 
868  template <typename Itr>
869  ustring16(Itr first, Itr last)
870  : array(0), allocated(0), used(0)
871  {
872 #if __BIG_ENDIAN__
873  encoding = unicode::EUTFE_UTF16_BE;
874 #else
875  encoding = unicode::EUTFE_UTF16_LE;
876 #endif
877  reserve(std::distance(first, last));
878  array[used] = 0;
879 
880  for (; first != last; ++first)
882  }
883 #endif
884 
885 
886 #ifndef USTRING_CPP0X_NEWLITERALS
887  ustring16(const char* const c)
889  : array(0), allocated(0), used(0)
890  {
891 #if __BIG_ENDIAN__
892  encoding = unicode::EUTFE_UTF16_BE;
893 #else
894  encoding = unicode::EUTFE_UTF16_LE;
895 #endif
896 
897  loadDataStream(c, strlen(c));
898  //append((uchar8_t*)c);
899  }
900 
901 
903  ustring16(const char* const c, size_t length)
904  : array(0), allocated(0), used(0)
905  {
906 #if __BIG_ENDIAN__
907  encoding = unicode::EUTFE_UTF16_BE;
908 #else
909  encoding = unicode::EUTFE_UTF16_LE;
910 #endif
911 
913  }
914 #endif
915 
916 
918  ustring16(const uchar8_t* const c)
919  : array(0), allocated(0), used(0)
920  {
921 #if __BIG_ENDIAN__
922  encoding = unicode::EUTFE_UTF16_BE;
923 #else
924  encoding = unicode::EUTFE_UTF16_LE;
925 #endif
926 
927  append(c);
928  }
929 
930 
932  ustring16(const char c)
933  : array(0), allocated(0), used(0)
934  {
935 #if __BIG_ENDIAN__
936  encoding = unicode::EUTFE_UTF16_BE;
937 #else
938  encoding = unicode::EUTFE_UTF16_LE;
939 #endif
940 
941  append((uchar32_t)c);
942  }
943 
944 
946  ustring16(const uchar8_t* const c, size_t length)
947  : array(0), allocated(0), used(0)
948  {
949 #if __BIG_ENDIAN__
950  encoding = unicode::EUTFE_UTF16_BE;
951 #else
952  encoding = unicode::EUTFE_UTF16_LE;
953 #endif
954 
955  append(c, length);
956  }
957 
958 
960  ustring16(const uchar16_t* const c)
961  : array(0), allocated(0), used(0)
962  {
963 #if __BIG_ENDIAN__
964  encoding = unicode::EUTFE_UTF16_BE;
965 #else
966  encoding = unicode::EUTFE_UTF16_LE;
967 #endif
968 
969  append(c);
970  }
971 
972 
974  ustring16(const uchar16_t* const c, size_t length)
975  : array(0), allocated(0), used(0)
976  {
977 #if __BIG_ENDIAN__
978  encoding = unicode::EUTFE_UTF16_BE;
979 #else
980  encoding = unicode::EUTFE_UTF16_LE;
981 #endif
982 
983  append(c, length);
984  }
985 
986 
988  ustring16(const uchar32_t* const c)
989  : array(0), allocated(0), used(0)
990  {
991 #if __BIG_ENDIAN__
992  encoding = unicode::EUTFE_UTF16_BE;
993 #else
994  encoding = unicode::EUTFE_UTF16_LE;
995 #endif
996 
997  append(c);
998  }
999 
1000 
1002  ustring16(const uchar32_t* const c, size_t length)
1003  : array(0), allocated(0), used(0)
1004  {
1005 #if __BIG_ENDIAN__
1006  encoding = unicode::EUTFE_UTF16_BE;
1007 #else
1008  encoding = unicode::EUTFE_UTF16_LE;
1009 #endif
1010 
1011  append(c, length);
1012  }
1013 
1014 
1016  ustring16(const wchar_t* const c)
1017  : array(0), allocated(0), used(0)
1018  {
1019 #if __BIG_ENDIAN__
1020  encoding = unicode::EUTFE_UTF16_BE;
1021 #else
1022  encoding = unicode::EUTFE_UTF16_LE;
1023 #endif
1024 
1025  if (sizeof(wchar_t) == 4)
1026  append(reinterpret_cast<const uchar32_t* const>(c));
1027  else if (sizeof(wchar_t) == 2)
1028  append(reinterpret_cast<const uchar16_t* const>(c));
1029  else if (sizeof(wchar_t) == 1)
1030  append(reinterpret_cast<const uchar8_t* const>(c));
1031  }
1032 
1033 
1035  ustring16(const wchar_t* const c, size_t length)
1036  : array(0), allocated(0), used(0)
1037  {
1038 #if __BIG_ENDIAN__
1039  encoding = unicode::EUTFE_UTF16_BE;
1040 #else
1041  encoding = unicode::EUTFE_UTF16_LE;
1042 #endif
1043 
1044  if (sizeof(wchar_t) == 4)
1045  append(reinterpret_cast<const uchar32_t* const>(c), length);
1046  else if (sizeof(wchar_t) == 2)
1047  append(reinterpret_cast<const uchar16_t* const>(c), length);
1048  else if (sizeof(wchar_t) == 1)
1049  append(reinterpret_cast<const uchar8_t* const>(c), length);
1050  }
1051 
1052 
1053 #ifdef USTRING_CPP0X
1054  ustring16(ustring16<TAlloc>&& other)
1056  : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
1057  {
1058  //std::cout << "MOVE constructor" << std::endl;
1059  other.array = 0;
1060  other.allocated = 0;
1061  other.used = 0;
1062  }
1063 #endif
1064 
1065 
1068  {
1069  allocator.deallocate(array); // delete [] array;
1070  }
1071 
1072 
1075  {
1076  if (this == &other)
1077  return *this;
1078 
1079  used = other.size_raw();
1080  if (used >= allocated)
1081  {
1082  allocator.deallocate(array); // delete [] array;
1083  allocated = used + 1;
1084  array = allocator.allocate(used + 1); //new u16[used];
1085  }
1086 
1087  const uchar16_t* p = other.c_str();
1088  for (size_t i=0; i<=used; ++i, ++p)
1089  array[i] = *p;
1090 
1091  array[used] = 0;
1092 
1093  // Validate our new UTF-16 string.
1094  validate();
1095 
1096  return *this;
1097  }
1098 
1099 
1100 #ifdef USTRING_CPP0X
1103  {
1104  if (this != &other)
1105  {
1106  //std::cout << "MOVE operator=" << std::endl;
1107  allocator.deallocate(array);
1108 
1109  array = other.array;
1110  allocated = other.allocated;
1111  encoding = other.encoding;
1112  used = other.used;
1113  other.array = 0;
1114  other.used = 0;
1115  }
1116  return *this;
1117  }
1118 #endif
1119 
1120 
1122  template <class B, class A>
1124  {
1125  *this = other.c_str();
1126  return *this;
1127  }
1128 
1129 
1132  {
1133  if (!array)
1134  {
1135  array = allocator.allocate(1); //new u16[1];
1136  allocated = 1;
1137  }
1138  used = 0;
1139  array[used] = 0x0;
1140  if (!c) return *this;
1141 
1143  append(c);
1144  return *this;
1145  }
1146 
1147 
1150  {
1151  if (!array)
1152  {
1153  array = allocator.allocate(1); //new u16[1];
1154  allocated = 1;
1155  }
1156  used = 0;
1157  array[used] = 0x0;
1158  if (!c) return *this;
1159 
1161  append(c);
1162  return *this;
1163  }
1164 
1165 
1168  {
1169  if (!array)
1170  {
1171  array = allocator.allocate(1); //new u16[1];
1172  allocated = 1;
1173  }
1174  used = 0;
1175  array[used] = 0x0;
1176  if (!c) return *this;
1177 
1179  append(c);
1180  return *this;
1181  }
1182 
1183 
1185 
1188  ustring16<TAlloc>& operator=(const wchar_t* const c)
1189  {
1190  if (sizeof(wchar_t) == 4)
1191  *this = reinterpret_cast<const uchar32_t* const>(c);
1192  else if (sizeof(wchar_t) == 2)
1193  *this = reinterpret_cast<const uchar16_t* const>(c);
1194  else if (sizeof(wchar_t) == 1)
1195  *this = reinterpret_cast<const uchar8_t* const>(c);
1196 
1197  return *this;
1198  }
1199 
1200 
1202 
1203  template <class B>
1204  ustring16<TAlloc>& operator=(const B* const c)
1205  {
1206  if (sizeof(B) == 4)
1207  *this = reinterpret_cast<const uchar32_t* const>(c);
1208  else if (sizeof(B) == 2)
1209  *this = reinterpret_cast<const uchar16_t* const>(c);
1210  else if (sizeof(B) == 1)
1211  *this = reinterpret_cast<const uchar8_t* const>(c);
1212 
1213  return *this;
1214  }
1215 
1216 
1218  access operator [](const size_t index)
1219  {
1220  _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1221  iterator iter(*this, index);
1222  return iter.operator*();
1223  }
1224 
1225 
1227  const access operator [](const size_t index) const
1228  {
1229  _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
1230  const_iterator iter(*this, index);
1231  return iter.operator*();
1232  }
1233 
1234 
1236  bool operator ==(const uchar16_t* const str) const
1237  {
1238  if (!str)
1239  return false;
1240 
1241  size_t i;
1242  for (i=0; array[i] && str[i]; ++i)
1243  if (array[i] != str[i])
1244  return false;
1245 
1246  return !array[i] && !str[i];
1247  }
1248 
1249 
1251  bool operator ==(const ustring16<TAlloc>& other) const
1252  {
1253  for (size_t i=0; array[i] && other.array[i]; ++i)
1254  if (array[i] != other.array[i])
1255  return false;
1256 
1257  return used == other.used;
1258  }
1259 
1260 
1262  bool operator <(const ustring16<TAlloc>& other) const
1263  {
1264  for (size_t i=0; array[i] && other.array[i]; ++i)
1265  {
1266  if (array[i] == other.array[i])
1267  continue;
1268  return array[i] < other.array[i];
1269  }
1270 
1271  return used < other.used;
1272  }
1273 
1274 
1276  bool operator !=(const uchar16_t* const str) const
1277  {
1278  return !(*this == str);
1279  }
1280 
1281 
1283  bool operator !=(const ustring16<TAlloc>& other) const
1284  {
1285  return !(*this == other);
1286  }
1287 
1288 
1291  size_t size() const
1292  {
1293  const_iterator i(*this, 0);
1294  size_t pos = 0;
1295  while (!i.atEnd())
1296  {
1297  ++i;
1298  ++pos;
1299  }
1300  return pos;
1301  }
1302 
1303 
1306  bool empty() const
1307  {
1308  return (size_raw() == 0);
1309  }
1310 
1311 
1314  const uchar16_t* c_str() const
1315  {
1316  return array;
1317  }
1318 
1319 
1324  bool equalsn(const ustring16<TAlloc>& other, size_t n) const
1325  {
1326  size_t i;
1327  const uchar16_t* oa = other.c_str();
1328  for(i=0; array[i] && oa[i] && i < n; ++i)
1329  if (array[i] != oa[i])
1330  return false;
1331 
1332  // if one (or both) of the strings was smaller then they
1333  // are only equal if they have the same length
1334  return (i == n) || (used == other.used);
1335  }
1336 
1337 
1342  bool equalsn(const uchar16_t* const str, size_t n) const
1343  {
1344  if (!str)
1345  return false;
1346  size_t i;
1347  for(i=0; array[i] && str[i] && i < n; ++i)
1348  if (array[i] != str[i])
1349  return false;
1350 
1351  // if one (or both) of the strings was smaller then they
1352  // are only equal if they have the same length
1353  return (i == n) || (array[i] == 0 && str[i] == 0);
1354  }
1355 
1356 
1361  {
1362  if (used + 2 >= allocated)
1363  reallocate(used + 2);
1364 
1365  if (character > 0xFFFF)
1366  {
1367  used += 2;
1368 
1369  // character will be multibyte, so split it up into a surrogate pair.
1370  uchar16_t x = static_cast<uchar16_t>(character);
1371  uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1372  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1373  array[used-2] = vh;
1374  array[used-1] = vl;
1375  }
1376  else
1377  {
1378  ++used;
1379  array[used-1] = character;
1380  }
1381  array[used] = 0;
1382 
1383  return *this;
1384  }
1385 
1386 
1391  ustring16<TAlloc>& append(const uchar8_t* const other, size_t length=0xffffffff)
1392  {
1393  if (!other)
1394  return *this;
1395 
1396  // Determine if the string is long enough for a BOM.
1397  size_t len = 0;
1398  const uchar8_t* p = other;
1399  do
1400  {
1401  ++len;
1402  } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
1403 
1404  // Check for BOM.
1407  {
1408  if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
1409  c_bom = unicode::EUTFE_UTF8;
1410  }
1411 
1412  // If a BOM was found, don't include it in the string.
1413  const uchar8_t* c2 = other;
1414  if (c_bom != unicode::EUTFE_NONE)
1415  {
1416  c2 = other + unicode::BOM_UTF8_LEN;
1418  }
1419 
1420  // Calculate the size of the string to read in.
1421  len = 0;
1422  p = c2;
1423  do
1424  {
1425  ++len;
1426  } while(*p++ && len < length);
1427  if (len > length)
1428  len = length;
1429 
1430  // If we need to grow the array, do it now.
1431  if (used + len >= allocated)
1432  reallocate(used + (len * 2));
1433  size_t start = used;
1434 
1435  // Convert UTF-8 to UTF-16.
1436  size_t pos = start;
1437  for (size_t l = 0; l<len;)
1438  {
1439  ++used;
1440  if (((c2[l] >> 6) & 0x03) == 0x02)
1441  { // Invalid continuation byte.
1443  ++l;
1444  }
1445  else if (c2[l] == 0xC0 || c2[l] == 0xC1)
1446  { // Invalid byte - overlong encoding.
1448  ++l;
1449  }
1450  else if ((c2[l] & 0xF8) == 0xF0)
1451  { // 4 bytes UTF-8, 2 bytes UTF-16.
1452  // Check for a full string.
1453  if ((l + 3) >= len)
1454  {
1456  l += 3;
1457  break;
1458  }
1459 
1460  // Validate.
1461  bool valid = true;
1462  u8 l2 = 0;
1463  if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1464  if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1465  if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1466  if (!valid)
1467  {
1469  l += l2;
1470  continue;
1471  }
1472 
1473  // Decode.
1474  uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
1475  uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
1476  uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
1477  uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
1478 
1479  // Split v up into a surrogate pair.
1480  uchar16_t x = static_cast<uchar16_t>(v);
1481  uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1482  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1483 
1484  array[pos++] = vh;
1485  array[pos++] = vl;
1486  l += 4;
1487  ++used; // Using two shorts this time, so increase used by 1.
1488  }
1489  else if ((c2[l] & 0xF0) == 0xE0)
1490  { // 3 bytes UTF-8, 1 byte UTF-16.
1491  // Check for a full string.
1492  if ((l + 2) >= len)
1493  {
1495  l += 2;
1496  break;
1497  }
1498 
1499  // Validate.
1500  bool valid = true;
1501  u8 l2 = 0;
1502  if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1503  if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
1504  if (!valid)
1505  {
1507  l += l2;
1508  continue;
1509  }
1510 
1511  // Decode.
1512  uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
1513  uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
1514  uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1515  array[pos++] = ch;
1516  l += 3;
1517  }
1518  else if ((c2[l] & 0xE0) == 0xC0)
1519  { // 2 bytes UTF-8, 1 byte UTF-16.
1520  // Check for a full string.
1521  if ((l + 1) >= len)
1522  {
1524  l += 1;
1525  break;
1526  }
1527 
1528  // Validate.
1529  if (((c2[l+1] >> 6) & 0x03) != 0x02)
1530  {
1532  ++l;
1533  continue;
1534  }
1535 
1536  // Decode.
1537  uchar8_t b1 = (c2[l] >> 2) & 0x7;
1538  uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
1539  uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
1540  array[pos++] = ch;
1541  l += 2;
1542  }
1543  else
1544  { // 1 byte UTF-8, 1 byte UTF-16.
1545  // Validate.
1546  if (c2[l] > 0x7F)
1547  { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
1549  }
1550  else array[pos++] = static_cast<uchar16_t>(c2[l]);
1551  ++l;
1552  }
1553  }
1554  array[used] = 0;
1555 
1556  // Validate our new UTF-16 string.
1557  validate();
1558 
1559  return *this;
1560  }
1561 
1562 
1567  ustring16<TAlloc>& append(const uchar16_t* const other, size_t length=0xffffffff)
1568  {
1569  if (!other)
1570  return *this;
1571 
1572  // Determine if the string is long enough for a BOM.
1573  size_t len = 0;
1574  const uchar16_t* p = other;
1575  do
1576  {
1577  ++len;
1578  } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
1579 
1580  // Check for the BOM to determine the string's endianness.
1583  c_end = unicode::EUTFEE_LITTLE;
1584  else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
1585  c_end = unicode::EUTFEE_BIG;
1586 
1587  // If a BOM was found, don't include it in the string.
1588  const uchar16_t* c2 = other;
1589  if (c_end != unicode::EUTFEE_NATIVE)
1590  {
1591  c2 = other + unicode::BOM_UTF16_LEN;
1593  }
1594 
1595  // Calculate the size of the string to read in.
1596  len = 0;
1597  p = c2;
1598  do
1599  {
1600  ++len;
1601  } while(*p++ && len < length);
1602  if (len > length)
1603  len = length;
1604 
1605  // If we need to grow the size of the array, do it now.
1606  if (used + len >= allocated)
1607  reallocate(used + (len * 2));
1608  size_t start = used;
1609  used += len;
1610 
1611  // Copy the string now.
1613  for (size_t l = start; l < start + len; ++l)
1614  {
1615  array[l] = (uchar16_t)c2[l];
1616  if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1618  }
1619 
1620  array[used] = 0;
1621 
1622  // Validate our new UTF-16 string.
1623  validate();
1624  return *this;
1625  }
1626 
1627 
1632  ustring16<TAlloc>& append(const uchar32_t* const other, size_t length=0xffffffff)
1633  {
1634  if (!other)
1635  return *this;
1636 
1637  // Check for the BOM to determine the string's endianness.
1640  c_end = unicode::EUTFEE_LITTLE;
1641  else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
1642  c_end = unicode::EUTFEE_BIG;
1643 
1644  // If a BOM was found, don't include it in the string.
1645  const uchar32_t* c2 = other;
1646  if (c_end != unicode::EUTFEE_NATIVE)
1647  {
1648  c2 = other + unicode::BOM_UTF32_LEN;
1650  }
1651 
1652  // Calculate the size of the string to read in.
1653  size_t len = 0;
1654  const uchar32_t* p = c2;
1655  do
1656  {
1657  ++len;
1658  } while(*p++ && len < length);
1659  if (len > length)
1660  len = length;
1661 
1662  // If we need to grow the size of the array, do it now.
1663  // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
1664  if (used + (len * 2) >= allocated)
1665  reallocate(used + ((len * 2) * 2));
1666  size_t start = used;
1667 
1668  // Convert UTF-32 to UTF-16.
1670  size_t pos = start;
1671  for (size_t l = 0; l<len; ++l)
1672  {
1673  ++used;
1674 
1675  uchar32_t ch = c2[l];
1676  if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
1677  ch = unicode::swapEndian32(ch);
1678 
1679  if (ch > 0xFFFF)
1680  {
1681  // Split ch up into a surrogate pair as it is over 16 bits long.
1682  uchar16_t x = static_cast<uchar16_t>(ch);
1683  uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
1684  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
1685  array[pos++] = vh;
1686  array[pos++] = vl;
1687  ++used; // Using two shorts, so increased used again.
1688  }
1689  else if (ch >= 0xD800 && ch <= 0xDFFF)
1690  {
1691  // Between possible UTF-16 surrogates (invalid!)
1693  }
1694  else array[pos++] = static_cast<uchar16_t>(ch);
1695  }
1696  array[used] = 0;
1697 
1698  // Validate our new UTF-16 string.
1699  validate();
1700 
1701  return *this;
1702  }
1703 
1704 
1709  {
1710  const uchar16_t* oa = other.c_str();
1711 
1712  size_t len = other.size_raw();
1713 
1714  if (used + len >= allocated)
1715  reallocate(used + len);
1716 
1717  for (size_t l=0; l<len; ++l)
1718  array[used+l] = oa[l];
1719 
1720  used += len;
1721  array[used] = 0;
1722 
1723  return *this;
1724  }
1725 
1726 
1732  {
1733  if (other.size() == 0)
1734  return *this;
1735 
1736  if (other.size() < length)
1737  {
1738  append(other);
1739  return *this;
1740  }
1741 
1742  if (used + length * 2 >= allocated)
1743  reallocate(used + length * 2);
1744 
1745  const_iterator iter(other, 0);
1746  size_t l = length;
1747  while (!iter.atEnd() && l)
1748  {
1749  uchar32_t c = *iter;
1750  append(c);
1751  ++iter;
1752  --l;
1753  }
1754 
1755  return *this;
1756  }
1757 
1758 
1761  void reserve(size_t count)
1762  {
1763  if (count < allocated)
1764  return;
1765 
1766  reallocate(count);
1767  }
1768 
1769 
1773  size_t findFirst(uchar32_t c) const
1774  {
1775  const_iterator i(*this, 0);
1776 
1777  size_t pos = 0;
1778  while (!i.atEnd())
1779  {
1780  uchar32_t t = *i;
1781  if (c == t)
1782  return pos;
1783  ++pos;
1784  ++i;
1785  }
1786 
1787  return ustring16<TAlloc>::npos;
1788  }
1789 
1794  size_t findFirstChar(const uchar32_t* const c, size_t count=1) const
1795  {
1796  if (!c || !count)
1797  return ustring16<TAlloc>::npos;
1798 
1799  const_iterator i(*this, 0);
1800 
1801  size_t pos = 0;
1802  while (!i.atEnd())
1803  {
1804  uchar32_t t = *i;
1805  for (size_t j = 0; j < count; ++j)
1806  if (t == c[j])
1807  return pos;
1808  ++pos;
1809  ++i;
1810  }
1811 
1812  return ustring16<TAlloc>::npos;
1813  }
1814 
1815 
1820  size_t findFirstCharNotInList(const uchar32_t* const c, size_t count=1) const
1821  {
1822  if (!c || !count)
1823  return ustring16<TAlloc>::npos;
1824 
1825  const_iterator i(*this, 0);
1826 
1827  size_t pos = 0;
1828  while (!i.atEnd())
1829  {
1830  uchar32_t t = *i;
1831  size_t j;
1832  for (j = 0; j < count; ++j)
1833  if (t == c[j])
1834  break;
1835 
1836  if (j == count)
1837  return pos;
1838  ++pos;
1839  ++i;
1840  }
1841 
1842  return ustring16<TAlloc>::npos;
1843  }
1844 
1849  size_t findLastCharNotInList(const uchar32_t* const c, size_t count=1) const
1850  {
1851  if (!c || !count)
1852  return ustring16<TAlloc>::npos;
1853 
1854  const_iterator i(end());
1855  --i;
1856 
1857  size_t pos = size();
1858  if (pos == 0)
1859  return ustring16<TAlloc>::npos;
1860  --pos;
1861 
1862  while (!i.atStart())
1863  {
1864  uchar32_t t = *i;
1865  size_t j;
1866  for (j = 0; j < count; ++j)
1867  if (t == c[j])
1868  break;
1869 
1870  if (j == count)
1871  return pos;
1872  --pos;
1873  --i;
1874  }
1875 
1876  return ustring16<TAlloc>::npos;
1877  }
1878 
1883  size_t findNext(uchar32_t c, size_t startPos) const
1884  {
1885  const_iterator i(*this, startPos);
1886 
1887  size_t pos = startPos;
1888  while (!i.atEnd())
1889  {
1890  uchar32_t t = *i;
1891  if (t == c)
1892  return pos;
1893  ++pos;
1894  ++i;
1895  }
1896 
1897  return ustring16<TAlloc>::npos;
1898  }
1899 
1900 
1906  {
1907  size_t s = size();
1909  start = s - 1;
1910 
1911  const_iterator i(*this, start);
1912  size_t pos = start;
1913  while (!i.atStart())
1914  {
1915  uchar32_t t = *i;
1916  if (t == c)
1917  return pos;
1918  --pos;
1919  --i;
1920  }
1921 
1922  return ustring16<TAlloc>::npos;
1923  }
1924 
1929  size_t findLastChar(const uchar32_t* const c, size_t count=1) const
1930  {
1931  if (!c || !count)
1932  return ustring16<TAlloc>::npos;
1933 
1934  const_iterator i(end());
1935  --i;
1936 
1937  size_t pos = size();
1938  while (!i.atStart())
1939  {
1940  uchar32_t t = *i;
1941  for (size_t j = 0; j < count; ++j)
1942  if (t == c[j])
1943  return pos;
1944  --pos;
1945  --i;
1946  }
1947 
1948  return ustring16<TAlloc>::npos;
1949  }
1950 
1951 
1956  size_t find(const ustring16<TAlloc>& str, const size_t start = 0) const
1957  {
1958  size_t my_size = size();
1959  size_t their_size = str.size();
1960 
1961  if (their_size == 0 || my_size - start < their_size)
1962  return ustring16<TAlloc>::npos;
1963 
1964  const_iterator i(*this, start);
1965 
1966  size_t pos = start;
1967  while (!i.atEnd())
1968  {
1969  const_iterator i2(i);
1970  const_iterator j(str, 0);
1971  uchar32_t t1 = (uchar32_t)*i2;
1972  uchar32_t t2 = (uchar32_t)*j;
1973  while (t1 == t2)
1974  {
1975  ++i2;
1976  ++j;
1977  if (j.atEnd())
1978  return pos;
1979  t1 = (uchar32_t)*i2;
1980  t2 = (uchar32_t)*j;
1981  }
1982  ++i;
1983  ++pos;
1984  }
1985 
1986  return ustring16<TAlloc>::npos;
1987  }
1988 
1989 
1994  size_t find_raw(const ustring16<TAlloc>& str, const size_t start = 0) const
1995  {
1996  const uchar16_t* data = str.c_str();
1997  if (data && *data)
1998  {
1999  size_t len = 0;
2000 
2001  while (data[len])
2002  ++len;
2003 
2004  if (len > used)
2005  return ustring16<TAlloc>::npos;
2006 
2007  for (size_t i = start; i <= used - len; ++i)
2008  {
2009  size_t j = 0;
2010 
2011  while(data[j] && array[i+j] == data[j])
2012  ++j;
2013 
2014  if (!data[j])
2015  return i;
2016  }
2017  }
2018 
2019  return ustring16<TAlloc>::npos;
2020  }
2021 
2022 
2027  ustring16<TAlloc> subString(size_t begin, size_t length) const
2028  {
2029  size_t len = size();
2030 
2031  // Check our inputs to see if we can exit early.
2032  if ((length == 0) || (begin>=len))
2033  return ustring16<TAlloc>("");
2034 
2035  // Clamp to maximum value.
2037  length = len - begin;
2038 
2039  // TODO: Check for near overflow values.
2040  //if (begin > ustring16<TAlloc>::npos - length)
2041 
2042  // Clamp to the string length.
2043  if ((length + begin) > len)
2044  length = len - begin;
2045 
2047  o.reserve((length+1) * 2);
2048 
2049  const_iterator i(*this, begin);
2050  while (!i.atEnd() && length)
2051  {
2052  o.append(*i);
2053  ++i;
2054  --length;
2055  }
2056 
2057  return o;
2058  }
2059 
2060 
2065  {
2066  append((uchar32_t)c);
2067  return *this;
2068  }
2069 
2070 
2075  {
2076  append(c);
2077  return *this;
2078  }
2079 
2080 
2085  {
2087  return *this;
2088  }
2089 
2090 
2095  {
2097  return *this;
2098  }
2099 
2100 
2101 #ifdef USTRING_CPP0X_NEWLITERALS
2106  {
2108  return *this;
2109  }
2110 
2111 
2115  ustring16<TAlloc>& operator += (unsigned int c)
2116  {
2118  return *this;
2119  }
2120 #endif
2121 
2122 
2127  {
2129  return *this;
2130  }
2131 
2132 
2137  {
2139  return *this;
2140  }
2141 
2142 
2147  {
2149  return *this;
2150  }
2151 
2152 
2157  {
2158  append(c);
2159  return *this;
2160  }
2161 
2162 
2167  {
2168  append(other);
2169  return *this;
2170  }
2171 
2172 
2178  {
2179  iterator i(*this, 0);
2180  while (!i.atEnd())
2181  {
2182  typename ustring16<TAlloc>::access a = *i;
2183  if ((uchar32_t)a == toReplace)
2184  a = replaceWith;
2185  ++i;
2186  }
2187  return *this;
2188  }
2189 
2190 
2195  ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
2196  {
2197  if (toReplace.size() == 0)
2198  return *this;
2199 
2200  const uchar16_t* other = toReplace.c_str();
2201  const uchar16_t* replace = replaceWith.c_str();
2202  const size_t other_size = toReplace.size_raw();
2203  const size_t replace_size = replaceWith.size_raw();
2204 
2205  // A character for character replace. The string will not shrink or grow.
2206  if (replace_size == other_size)
2207  {
2208  size_t pos = 0;
2209  while ((pos = find_raw(other, pos)) != ustring16<TAlloc>::npos)
2210  {
2211  for (size_t i = 0; i < replace_size; ++i)
2212  array[pos + i] = replace[i];
2213  ++pos;
2214  }
2215  return *this;
2216  }
2217 
2218  // We are going to be removing some characters. The string will shrink.
2219  if (replace_size < other_size)
2220  {
2221  size_t i = 0;
2222  for (size_t pos = 0; pos <= used; ++i, ++pos)
2223  {
2224  // Is this potentially a match?
2225  if (array[pos] == *other)
2226  {
2227  // Check to see if we have a match.
2228  size_t j;
2229  for (j = 0; j < other_size; ++j)
2230  {
2231  if (array[pos + j] != other[j])
2232  break;
2233  }
2234 
2235  // If we have a match, replace characters.
2236  if (j == other_size)
2237  {
2238  for (j = 0; j < replace_size; ++j)
2239  array[i + j] = replace[j];
2240  i += replace_size - 1;
2241  pos += other_size - 1;
2242  continue;
2243  }
2244  }
2245 
2246  // No match found, just copy characters.
2247  array[i - 1] = array[pos];
2248  }
2249  array[i] = 0;
2250  used = i;
2251 
2252  return *this;
2253  }
2254 
2255  // Determine the delta.
2256  size_t delta = replace_size - other_size;
2257 
2258  // We are going to be adding characters, so the string size will increase.
2259  // Count the number of times toReplace exists in the string so we can allocate the new size.
2260  size_t find_count = 0;
2261  size_t pos = 0;
2262  while ((pos = find_raw(other, pos)) != ustring16<TAlloc>::npos)
2263  {
2264  ++find_count;
2265  ++pos;
2266  }
2267 
2268  // Re-allocate the string now, if needed.
2269  size_t len = delta * find_count;
2270  if (used + len >= allocated)
2271  reallocate(used + len);
2272 
2273  // Start replacing.
2274  pos = 0;
2275  while ((pos = find_raw(other, pos)) != ustring16<TAlloc>::npos)
2276  {
2277  uchar16_t* start = array + pos + other_size - 1;
2278  uchar16_t* ptr = array + used;
2279  uchar16_t* end = array + used + delta;
2280 
2281  // Shift characters to make room for the string.
2282  while (ptr != start)
2283  {
2284  *end = *ptr;
2285  --ptr;
2286  --end;
2287  }
2288 
2289  // Add the new string now.
2290  for (size_t i = 0; i < replace_size; ++i)
2291  array[pos + i] = replace[i];
2292 
2293  pos += replace_size;
2294  used += delta;
2295  }
2296 
2297  // Terminate the string and return ourself.
2298  array[used] = 0;
2299  return *this;
2300  }
2301 
2302 
2307  {
2308  size_t pos = 0;
2309  size_t found = 0;
2310  size_t len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2311  for (size_t i=0; i<=used; ++i)
2312  {
2313  uchar32_t uc32 = 0;
2314  if (!UTF16_IS_SURROGATE_HI(array[i]))
2315  uc32 |= array[i];
2316  else if (i + 1 <= used)
2317  {
2318  // Convert the surrogate pair into a single UTF-32 character.
2319  uc32 = unicode::toUTF32(array[i], array[i + 1]);
2320  }
2321  size_t len2 = (uc32 > 0xFFFF ? 2 : 1);
2322 
2323  if (uc32 == c)
2324  {
2325  found += len;
2326  continue;
2327  }
2328 
2329  array[pos++] = array[i];
2330  if (len2 == 2)
2331  array[pos++] = array[++i];
2332  }
2333  used -= found;
2334  array[used] = 0;
2335  return *this;
2336  }
2337 
2338 
2343  {
2344  size_t size = toRemove.size_raw();
2345  if (size == 0) return *this;
2346 
2347  const uchar16_t* tra = toRemove.c_str();
2348  size_t pos = 0;
2349  size_t found = 0;
2350  for (size_t i=0; i<=used; ++i)
2351  {
2352  size_t j = 0;
2353  while (j < size)
2354  {
2355  if (array[i + j] != tra[j])
2356  break;
2357  ++j;
2358  }
2359  if (j == size)
2360  {
2361  found += size;
2362  i += size - 1;
2363  continue;
2364  }
2365 
2366  array[pos++] = array[i];
2367  }
2368  used -= found;
2369  array[used] = 0;
2370  return *this;
2371  }
2372 
2373 
2378  {
2379  if (characters.size_raw() == 0)
2380  return *this;
2381 
2382  size_t pos = 0;
2383  size_t found = 0;
2384  const_iterator iter(characters);
2385  for (size_t i=0; i<=used; ++i)
2386  {
2387  uchar32_t uc32 = 0;
2388  if (!UTF16_IS_SURROGATE_HI(array[i]))
2389  uc32 |= array[i];
2390  else if (i + 1 <= used)
2391  {
2392  // Convert the surrogate pair into a single UTF-32 character.
2393  uc32 = unicode::toUTF32(array[i], array[i+1]);
2394  }
2395  size_t len2 = (uc32 > 0xFFFF ? 2 : 1);
2396 
2397  bool cont = false;
2398  iter.toStart();
2399  while (!iter.atEnd())
2400  {
2401  uchar32_t c = *iter;
2402  if (uc32 == c)
2403  {
2404  found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
2405  ++i;
2406  cont = true;
2407  break;
2408  }
2409  ++iter;
2410  }
2411  if (cont) continue;
2412 
2413  array[pos++] = array[i];
2414  if (len2 == 2)
2415  array[pos++] = array[++i];
2416  }
2417  used -= found;
2418  array[used] = 0;
2419  return *this;
2420  }
2421 
2422 
2427  ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
2428  {
2429  core::array<uchar32_t> utf32white = whitespace.toUTF32();
2430 
2431  // find start and end of the substring without the specified characters
2432  const size_t begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2434  return (*this="");
2435 
2436  const size_t end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
2438  return (*this = subString(begin, ustring16<TAlloc>::npos));
2439  else return (*this = subString(begin, (end + 1) - begin));
2440  }
2441 
2442 
2448  {
2449  _IRR_DEBUG_BREAK_IF(index>used) // access violation
2450 
2451  iterator i(*this, index);
2452 
2453  uchar32_t t = *i;
2454  size_t len = (t > 0xFFFF ? 2 : 1);
2455 
2456  for (size_t j = i.getPos() + len; j <= used; ++j)
2457  array[j - len] = array[j];
2458 
2459  used -= len;
2460  array[used] = 0;
2461 
2462  return *this;
2463  }
2464 
2465 
2469  {
2470  // Validate all unicode characters.
2471  for (size_t i=0; i<allocated; ++i)
2472  {
2473  // Terminate on existing null.
2474  if (array[i] == 0)
2475  {
2476  used = i;
2477  return *this;
2478  }
2479  if (UTF16_IS_SURROGATE(array[i]))
2480  {
2481  if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
2483  else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
2485  ++i;
2486  }
2487  if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
2489  }
2490 
2491  // terminate
2492  used = 0;
2493  if (allocated > 0)
2494  {
2495  used = allocated - 1;
2496  array[used] = 0;
2497  }
2498  return *this;
2499  }
2500 
2501 
2505  {
2506  if (used < 1)
2507  return 0;
2508 
2509  if (UTF16_IS_SURROGATE_LO(array[used-1]))
2510  {
2511  // Make sure we have a paired surrogate.
2512  if (used < 2)
2513  return 0;
2514 
2515  // Check for an invalid surrogate.
2516  if (!UTF16_IS_SURROGATE_HI(array[used-2]))
2517  return 0;
2518 
2519  // Convert the surrogate pair into a single UTF-32 character.
2520  return unicode::toUTF32(array[used-2], array[used-1]);
2521  }
2522  else
2523  {
2524  return array[used-1];
2525  }
2526  }
2527 
2528 
2530 
2547  template<class container>
2548  size_t split(container& ret, const uchar32_t* const c, size_t count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2549  {
2550  if (!c)
2551  return 0;
2552 
2553  const_iterator i(*this);
2554  const size_t oldSize=ret.size();
2555  size_t pos = 0;
2556  size_t lastpos = 0;
2557  size_t lastpospos = 0;
2558  bool lastWasSeparator = false;
2559  while (!i.atEnd())
2560  {
2561  uchar32_t ch = *i;
2562  bool foundSeparator = false;
2563  for (size_t j=0; j<count; ++j)
2564  {
2565  if (ch == c[j])
2566  {
2567  if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
2568  !lastWasSeparator)
2569  ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
2570  foundSeparator = true;
2571  lastpos = (keepSeparators ? pos : pos + 1);
2572  lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
2573  break;
2574  }
2575  }
2576  lastWasSeparator = foundSeparator;
2577  ++pos;
2578  ++i;
2579  }
2580  size_t s = size() + 1;
2581  if (s > lastpos)
2582  ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
2583  return ret.size()-oldSize;
2584  }
2585 
2586 
2588 
2604  template<class container>
2605  size_t split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
2606  {
2607  core::array<uchar32_t> v = c.toUTF32();
2608  return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
2609  }
2610 
2611 
2614  size_t capacity() const
2615  {
2616  return allocated;
2617  }
2618 
2619 
2622  size_t size_raw() const
2623  {
2624  return used;
2625  }
2626 
2627 
2633  {
2634  u8 len = (c > 0xFFFF ? 2 : 1);
2635 
2636  if (used + len >= allocated)
2637  reallocate(used + len);
2638 
2639  used += len;
2640 
2641  iterator iter(*this, pos);
2642  for (size_t i = used - 2; i > iter.getPos(); --i)
2643  array[i] = array[i - len];
2644 
2645  if (c > 0xFFFF)
2646  {
2647  // c will be multibyte, so split it up into a surrogate pair.
2648  uchar16_t x = static_cast<uchar16_t>(c);
2649  uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
2650  uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
2651  array[iter.getPos()] = vh;
2652  array[iter.getPos()+1] = vl;
2653  }
2654  else
2655  {
2656  array[iter.getPos()] = static_cast<uchar16_t>(c);
2657  }
2658  array[used] = 0;
2659  return *this;
2660  }
2661 
2662 
2668  {
2669  size_t len = c.size_raw();
2670  if (len == 0) return *this;
2671 
2672  if (used + len >= allocated)
2673  reallocate(used + len);
2674 
2675  used += len;
2676 
2677  iterator iter(*this, pos);
2678  for (size_t i = used - 2; i > iter.getPos() + len; --i)
2679  array[i] = array[i - len];
2680 
2681  const uchar16_t* s = c.c_str();
2682  for (size_t i = 0; i < len; ++i)
2683  {
2684  array[pos++] = *s;
2685  ++s;
2686  }
2687 
2688  array[used] = 0;
2689  return *this;
2690  }
2691 
2692 
2698  {
2699  if (used + 1 >= allocated)
2700  reallocate(used + 1);
2701 
2702  ++used;
2703 
2704  for (size_t i = used - 1; i > pos; --i)
2705  array[i] = array[i - 1];
2706 
2707  array[pos] = c;
2708  array[used] = 0;
2709  return *this;
2710  }
2711 
2712 
2717  {
2718  for (size_t i=pos; i<=used; ++i)
2719  {
2720  array[i] = array[i + 1];
2721  }
2722  --used;
2723  array[used] = 0;
2724  return *this;
2725  }
2726 
2727 
2733  {
2734  array[pos] = c;
2735  return *this;
2736  }
2737 
2738 
2742  {
2743  iterator i(*this, 0);
2744  return i;
2745  }
2746 
2747 
2751  {
2752  const_iterator i(*this, 0);
2753  return i;
2754  }
2755 
2756 
2760  {
2761  const_iterator i(*this, 0);
2762  return i;
2763  }
2764 
2765 
2769  {
2770  iterator i(*this, 0);
2771  i.toEnd();
2772  return i;
2773  }
2774 
2775 
2779  {
2780  const_iterator i(*this, 0);
2781  i.toEnd();
2782  return i;
2783  }
2784 
2785 
2789  {
2790  const_iterator i(*this, 0);
2791  i.toEnd();
2792  return i;
2793  }
2794 
2795 
2799  core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
2800  {
2802  ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2803  const_iterator iter(*this, 0);
2804 
2805  // Add the byte order mark if the user wants it.
2806  if (addBOM)
2807  {
2811  }
2812 
2813  while (!iter.atEnd())
2814  {
2815  uchar32_t c = *iter;
2816  if (c > 0xFFFF)
2817  { // 4 bytes
2818  uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2819  uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2820  uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2821  uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2822  ret.append(b1);
2823  ret.append(b2);
2824  ret.append(b3);
2825  ret.append(b4);
2826  }
2827  else if (c > 0x7FF)
2828  { // 3 bytes
2829  uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2830  uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2831  uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2832  ret.append(b1);
2833  ret.append(b2);
2834  ret.append(b3);
2835  }
2836  else if (c > 0x7F)
2837  { // 2 bytes
2838  uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2839  uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2840  ret.append(b1);
2841  ret.append(b2);
2842  }
2843  else
2844  { // 1 byte
2845  ret.append(static_cast<uchar8_t>(c));
2846  }
2847  ++iter;
2848  }
2849  return ret;
2850  }
2851 
2852 
2856  core::array<uchar8_t> toUTF8(const bool addBOM = false) const
2857  {
2858  core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
2859  const_iterator iter(*this, 0);
2860 
2861  // Add the byte order mark if the user wants it.
2862  if (addBOM)
2863  {
2867  }
2868 
2869  while (!iter.atEnd())
2870  {
2871  uchar32_t c = *iter;
2872  if (c > 0xFFFF)
2873  { // 4 bytes
2874  uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
2875  uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
2876  uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
2877  uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
2878  ret.push_back(b1);
2879  ret.push_back(b2);
2880  ret.push_back(b3);
2881  ret.push_back(b4);
2882  }
2883  else if (c > 0x7FF)
2884  { // 3 bytes
2885  uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
2886  uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
2887  uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
2888  ret.push_back(b1);
2889  ret.push_back(b2);
2890  ret.push_back(b3);
2891  }
2892  else if (c > 0x7F)
2893  { // 2 bytes
2894  uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
2895  uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
2896  ret.push_back(b1);
2897  ret.push_back(b2);
2898  }
2899  else
2900  { // 1 byte
2901  ret.push_back(static_cast<uchar8_t>(c));
2902  }
2903  ++iter;
2904  }
2905  ret.push_back(0);
2906  return ret;
2907  }
2908 
2909 
2910 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2911  core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2916  {
2918  ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2919 
2920  // Add the BOM if specified.
2921  if (addBOM)
2922  {
2923  if (endian == unicode::EUTFEE_NATIVE)
2924  ret[0] = unicode::BOM;
2925  else if (endian == unicode::EUTFEE_LITTLE)
2926  {
2927  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2928  *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2929  *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2930  }
2931  else
2932  {
2933  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ret.c_str());
2934  *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2935  *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2936  }
2937  }
2938 
2939  ret.append(array);
2940  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2941  {
2942  char16_t* ptr = ret.c_str();
2943  for (size_t i = 0; i < ret.size(); ++i)
2944  *ptr++ = unicode::swapEndian16(*ptr);
2945  }
2946  return ret;
2947  }
2948 #endif
2949 
2950 
2956  core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2957  {
2958  core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
2959  uchar16_t* ptr = ret.pointer();
2960 
2961  // Add the BOM if specified.
2962  if (addBOM)
2963  {
2964  if (endian == unicode::EUTFEE_NATIVE)
2965  *ptr = unicode::BOM;
2966  else if (endian == unicode::EUTFEE_LITTLE)
2967  {
2968  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2969  *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
2970  *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
2971  }
2972  else
2973  {
2974  uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
2975  *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
2976  *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
2977  }
2978  ++ptr;
2979  }
2980 
2981  memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
2982  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
2983  {
2984  for (size_t i = 0; i <= used; ++i)
2985  *ptr++ = unicode::swapEndian16(*ptr);
2986  }
2987  ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
2988  ret.push_back(0);
2989  return ret;
2990  }
2991 
2992 
2993 #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
2994  core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
2999  {
3001  ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
3002  const_iterator iter(*this, 0);
3003 
3004  // Add the BOM if specified.
3005  if (addBOM)
3006  {
3007  if (endian == unicode::EUTFEE_NATIVE)
3008  ret.append(unicode::BOM);
3009  else
3010  {
3011  union
3012  {
3013  uchar32_t full;
3014  u8 chunk[4];
3015  } t;
3016 
3017  if (endian == unicode::EUTFEE_LITTLE)
3018  {
3019  t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3020  t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3021  t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3022  t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3023  }
3024  else
3025  {
3026  t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3027  t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3028  t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3029  t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3030  }
3031  ret.append(t.full);
3032  }
3033  }
3034 
3035  while (!iter.atEnd())
3036  {
3037  uchar32_t c = *iter;
3038  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3040  ret.append(c);
3041  ++iter;
3042  }
3043  return ret;
3044  }
3045 #endif
3046 
3047 
3053  core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3054  {
3055  core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
3056  const_iterator iter(*this, 0);
3057 
3058  // Add the BOM if specified.
3059  if (addBOM)
3060  {
3061  if (endian == unicode::EUTFEE_NATIVE)
3062  ret.push_back(unicode::BOM);
3063  else
3064  {
3065  union
3066  {
3067  uchar32_t full;
3068  u8 chunk[4];
3069  } t;
3070 
3071  if (endian == unicode::EUTFEE_LITTLE)
3072  {
3073  t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
3074  t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
3075  t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
3076  t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
3077  }
3078  else
3079  {
3080  t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
3081  t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
3082  t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
3083  t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
3084  }
3085  ret.push_back(t.full);
3086  }
3087  }
3088  ret.push_back(0);
3089 
3090  while (!iter.atEnd())
3091  {
3092  uchar32_t c = *iter;
3093  if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
3095  ret.push_back(c);
3096  ++iter;
3097  }
3098  return ret;
3099  }
3100 
3101 
3103 
3105  core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3109  {
3110  if (sizeof(wchar_t) == 4)
3111  {
3112  core::array<uchar32_t> a(toUTF32(endian, addBOM));
3113  core::stringw ret(a.pointer());
3114  return ret;
3115  }
3116  else if (sizeof(wchar_t) == 2)
3117  {
3118  if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3119  {
3120  core::stringw ret(array);
3121  return ret;
3122  }
3123  else
3124  {
3125  core::array<uchar16_t> a(toUTF16(endian, addBOM));
3126  core::stringw ret(a.pointer());
3127  return ret;
3128  }
3129  }
3130  else if (sizeof(wchar_t) == 1)
3131  {
3132  core::array<uchar8_t> a(toUTF8(addBOM));
3133  core::stringw ret(a.pointer());
3134  return ret;
3135  }
3136 
3137  // Shouldn't happen.
3138  return core::stringw();
3139  }
3140 
3141 
3143 
3145  core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3149  {
3150  if (sizeof(wchar_t) == 4)
3151  {
3152  core::array<uchar32_t> a(toUTF32(endian, addBOM));
3153  core::array<wchar_t> ret(a.size());
3154  ret.set_used(a.size());
3155  memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
3156  return ret;
3157  }
3158  if (sizeof(wchar_t) == 2)
3159  {
3160  if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
3161  {
3162  core::array<wchar_t> ret(used);
3163  ret.set_used(used);
3164  memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
3165  return ret;
3166  }
3167  else
3168  {
3169  core::array<uchar16_t> a(toUTF16(endian, addBOM));
3170  core::array<wchar_t> ret(a.size());
3171  ret.set_used(a.size());
3172  memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
3173  return ret;
3174  }
3175  }
3176  if (sizeof(wchar_t) == 1)
3177  {
3178  core::array<uchar8_t> a(toUTF8(addBOM));
3179  core::array<wchar_t> ret(a.size());
3180  ret.set_used(a.size());
3181  memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
3182  return ret;
3183  }
3184 
3185  // Shouldn't happen.
3186  return core::array<wchar_t>();
3187  }
3188 
3193  io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
3194  {
3195 #if defined(_IRR_WCHAR_FILESYSTEM)
3196  return toWCHAR_s(endian, addBOM);
3197 #else
3198  return toUTF8_s(addBOM);
3199 #endif
3200  }
3201 
3207  ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
3208  {
3209  // Clear our string.
3210  *this = "";
3211  if (!data)
3212  return *this;
3213 
3215  switch (e)
3216  {
3217  default:
3218  case unicode::EUTFE_UTF8:
3219  append((uchar8_t*)data, data_size);
3220  break;
3221 
3222  case unicode::EUTFE_UTF16:
3225  append((uchar16_t*)data, data_size / 2);
3226  break;
3227 
3228  case unicode::EUTFE_UTF32:
3231  append((uchar32_t*)data, data_size / 4);
3232  break;
3233  }
3234 
3235  return *this;
3236  }
3237 
3241  {
3242  return encoding;
3243  }
3244 
3248  {
3249  if (encoding == unicode::EUTFE_UTF16_LE ||
3250  encoding == unicode::EUTFE_UTF32_LE)
3251  return unicode::EUTFEE_LITTLE;
3252  else return unicode::EUTFEE_BIG;
3253  }
3254 
3255 private:
3256 
3259  void reallocate(size_t new_size)
3260  {
3261  uchar16_t* old_array = array;
3262 
3263  array = allocator.allocate(new_size + 1); //new u16[new_size];
3264  allocated = new_size + 1;
3265  if (old_array == 0) return;
3266 
3267  size_t amount = used < new_size ? used : new_size;
3268  for (size_t i=0; i<=amount; ++i)
3269  array[i] = old_array[i];
3270 
3271  if (allocated <= used)
3272  used = allocated - 1;
3273 
3274  array[used] = 0;
3275 
3276  allocator.deallocate(old_array); // delete [] old_array;
3277  }
3278 
3279  //--- member variables
3280 
3281  uchar16_t* array;
3282  unicode::EUTF_ENCODE encoding;
3283  size_t allocated;
3284  size_t used;
3285  TAlloc allocator;
3286  //irrAllocator<uchar16_t> allocator;
3287 };
3288 
3290 
3291 
3293 template <typename TAlloc>
3295 {
3296  ustring16<TAlloc> ret(left);
3297  ret += right;
3298  return ret;
3299 }
3300 
3301 
3303 template <typename TAlloc, class B>
3305 {
3306  ustring16<TAlloc> ret(left);
3307  ret += right;
3308  return ret;
3309 }
3310 
3311 
3313 template <class B, typename TAlloc>
3315 {
3316  ustring16<TAlloc> ret(left);
3317  ret += right;
3318  return ret;
3319 }
3320 
3321 
3323 template <typename TAlloc, typename B, typename BAlloc>
3325 {
3326  ustring16<TAlloc> ret(left);
3327  ret += right;
3328  return ret;
3329 }
3330 
3331 
3333 template <typename TAlloc, typename B, typename BAlloc>
3335 {
3336  ustring16<TAlloc> ret(left);
3337  ret += right;
3338  return ret;
3339 }
3340 
3341 
3342 #ifndef USTRING_NO_STL
3343 template <typename TAlloc, typename B, typename A, typename BAlloc>
3345 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
3346 {
3347  ustring16<TAlloc> ret(left);
3348  ret += right;
3349  return ret;
3350 }
3351 
3353 template <typename TAlloc, typename B, typename A, typename BAlloc>
3354 inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
3355 {
3356  ustring16<TAlloc> ret(left);
3357  ret += right;
3358  return ret;
3359 }
3360 #endif
3361 
3363 template <typename TAlloc>
3365 {
3366  ustring16<TAlloc> ret(left);
3367  ret += right;
3368  return ret;
3369 }
3370 
3371 
3373 template <typename TAlloc>
3375 {
3376  ustring16<TAlloc> ret(left);
3377  ret += right;
3378  return ret;
3379 }
3380 
3381 
3382 #ifdef USTRING_CPP0X_NEWLITERALS
3383 template <typename TAlloc>
3385 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
3386 {
3387  ustring16<TAlloc> ret(left);
3388  ret += right;
3389  return ret;
3390 }
3391 
3392 
3394 template <typename TAlloc>
3395 inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
3396 {
3397  ustring16<TAlloc> ret(left);
3398  ret += right;
3399  return ret;
3400 }
3401 #endif
3402 
3403 
3405 template <typename TAlloc>
3407 {
3408  ustring16<TAlloc> ret(left);
3409  ret += core::stringc(right);
3410  return ret;
3411 }
3412 
3413 
3415 template <typename TAlloc>
3417 {
3419  ret += right;
3420  return ret;
3421 }
3422 
3423 
3425 template <typename TAlloc>
3426 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
3427 {
3428  ustring16<TAlloc> ret(left);
3429  ret += core::stringc(right);
3430  return ret;
3431 }
3432 
3433 
3435 template <typename TAlloc>
3436 inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
3437 {
3439  ret += right;
3440  return ret;
3441 }
3442 
3443 
3445 template <typename TAlloc>
3447 {
3448  ustring16<TAlloc> ret(left);
3449  ret += core::stringc(right);
3450  return ret;
3451 }
3452 
3453 
3455 template <typename TAlloc>
3457 {
3459  ret += right;
3460  return ret;
3461 }
3462 
3463 
3465 template <typename TAlloc>
3466 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
3467 {
3468  ustring16<TAlloc> ret(left);
3469  ret += core::stringc(right);
3470  return ret;
3471 }
3472 
3473 
3475 template <typename TAlloc>
3476 inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
3477 {
3479  ret += right;
3480  return ret;
3481 }
3482 
3483 
3485 template <typename TAlloc>
3487 {
3488  ustring16<TAlloc> ret(left);
3489  ret += core::stringc(right);
3490  return ret;
3491 }
3492 
3493 
3495 template <typename TAlloc>
3497 {
3499  ret += right;
3500  return ret;
3501 }
3502 
3503 
3505 template <typename TAlloc>
3506 inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
3507 {
3508  ustring16<TAlloc> ret(left);
3509  ret += core::stringc(right);
3510  return ret;
3511 }
3512 
3513 
3515 template <typename TAlloc>
3516 inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
3517 {
3519  ret += right;
3520  return ret;
3521 }
3522 
3523 
3525 template <typename TAlloc>
3527 {
3528  ustring16<TAlloc> ret(left);
3529  ret += core::stringc(right);
3530  return ret;
3531 }
3532 
3533 
3535 template <typename TAlloc>
3537 {
3539  ret += right;
3540  return ret;
3541 }
3542 
3543 
3545 template <typename TAlloc>
3547 {
3548  ustring16<TAlloc> ret(left);
3549  ret += core::stringc(right);
3550  return ret;
3551 }
3552 
3553 
3555 template <typename TAlloc>
3557 {
3559  ret += right;
3560  return ret;
3561 }
3562 
3563 
3564 #ifdef USTRING_CPP0X
3565 template <typename TAlloc>
3567 inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
3568 {
3569  //std::cout << "MOVE operator+(&, &&)" << std::endl;
3570  right.insert(left, 0);
3571  return std::move(right);
3572 }
3573 
3574 
3576 template <typename TAlloc>
3577 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
3578 {
3579  //std::cout << "MOVE operator+(&&, &)" << std::endl;
3580  left.append(right);
3581  return std::move(left);
3582 }
3583 
3584 
3586 template <typename TAlloc>
3587 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
3588 {
3589  //std::cout << "MOVE operator+(&&, &&)" << std::endl;
3590  if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
3591  (right.capacity() - right.size_raw() < left.size_raw()))
3592  {
3593  left.append(right);
3594  return std::move(left);
3595  }
3596  else
3597  {
3598  right.insert(left, 0);
3599  return std::move(right);
3600  }
3601 }
3602 
3603 
3605 template <typename TAlloc, class B>
3606 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
3607 {
3608  //std::cout << "MOVE operator+(&&, B*)" << std::endl;
3609  left.append(right);
3610  return std::move(left);
3611 }
3612 
3613 
3615 template <class B, typename TAlloc>
3616 inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
3617 {
3618  //std::cout << "MOVE operator+(B*, &&)" << std::endl;
3619  right.insert(left, 0);
3620  return std::move(right);
3621 }
3622 
3623 
3625 template <typename TAlloc, typename B, typename BAlloc>
3626 inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
3627 {
3628  //std::cout << "MOVE operator+(&, &&)" << std::endl;
3629  right.insert(left, 0);
3630  return std::move(right);
3631 }
3632 
3633 
3635 template <typename TAlloc, typename B, typename BAlloc>
3636 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
3637 {
3638  //std::cout << "MOVE operator+(&&, &)" << std::endl;
3639  left.append(right);
3640  return std::move(left);
3641 }
3642 
3643 
3645 template <typename TAlloc, typename B, typename A, typename BAlloc>
3646 inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
3647 {
3648  //std::cout << "MOVE operator+(&, &&)" << std::endl;
3649  right.insert(core::ustring16<TAlloc>(left), 0);
3650  return std::move(right);
3651 }
3652 
3653 
3655 template <typename TAlloc, typename B, typename A, typename BAlloc>
3656 inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
3657 {
3658  //std::cout << "MOVE operator+(&&, &)" << std::endl;
3659  left.append(right);
3660  return std::move(left);
3661 }
3662 
3663 
3665 template <typename TAlloc>
3666 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
3667 {
3668  left.append((uchar32_t)right);
3669  return std::move(left);
3670 }
3671 
3672 
3674 template <typename TAlloc>
3675 inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
3676 {
3677  right.insert((uchar32_t)left, 0);
3678  return std::move(right);
3679 }
3680 
3681 
3682 #ifdef USTRING_CPP0X_NEWLITERALS
3683 template <typename TAlloc>
3685 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
3686 {
3687  left.append(right);
3688  return std::move(left);
3689 }
3690 
3691 
3693 template <typename TAlloc>
3694 inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
3695 {
3696  right.insert(left, 0);
3697  return std::move(right);
3698 }
3699 #endif
3700 
3701 
3703 template <typename TAlloc>
3704 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
3705 {
3706  left.append(core::stringc(right));
3707  return std::move(left);
3708 }
3709 
3710 
3712 template <typename TAlloc>
3713 inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
3714 {
3715  right.insert(core::stringc(left), 0);
3716  return std::move(right);
3717 }
3718 
3719 
3721 template <typename TAlloc>
3722 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
3723 {
3724  left.append(core::stringc(right));
3725  return std::move(left);
3726 }
3727 
3728 
3730 template <typename TAlloc>
3731 inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
3732 {
3733  right.insert(core::stringc(left), 0);
3734  return std::move(right);
3735 }
3736 
3737 
3739 template <typename TAlloc>
3740 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
3741 {
3742  left.append(core::stringc(right));
3743  return std::move(left);
3744 }
3745 
3746 
3748 template <typename TAlloc>
3749 inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
3750 {
3751  right.insert(core::stringc(left), 0);
3752  return std::move(right);
3753 }
3754 
3755 
3757 template <typename TAlloc>
3758 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
3759 {
3760  left.append(core::stringc(right));
3761  return std::move(left);
3762 }
3763 
3764 
3766 template <typename TAlloc>
3767 inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
3768 {
3769  right.insert(core::stringc(left), 0);
3770  return std::move(right);
3771 }
3772 
3773 
3775 template <typename TAlloc>
3776 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
3777 {
3778  left.append(core::stringc(right));
3779  return std::move(left);
3780 }
3781 
3782 
3784 template <typename TAlloc>
3785 inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
3786 {
3787  right.insert(core::stringc(left), 0);
3788  return std::move(right);
3789 }
3790 
3791 
3793 template <typename TAlloc>
3794 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
3795 {
3796  left.append(core::stringc(right));
3797  return std::move(left);
3798 }
3799 
3800 
3802 template <typename TAlloc>
3803 inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
3804 {
3805  right.insert(core::stringc(left), 0);
3806  return std::move(right);
3807 }
3808 
3809 
3811 template <typename TAlloc>
3812 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
3813 {
3814  left.append(core::stringc(right));
3815  return std::move(left);
3816 }
3817 
3818 
3820 template <typename TAlloc>
3821 inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
3822 {
3823  right.insert(core::stringc(left), 0);
3824  return std::move(right);
3825 }
3826 
3827 
3829 template <typename TAlloc>
3830 inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
3831 {
3832  left.append(core::stringc(right));
3833  return std::move(left);
3834 }
3835 
3836 
3838 template <typename TAlloc>
3839 inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
3840 {
3841  right.insert(core::stringc(left), 0);
3842  return std::move(right);
3843 }
3844 #endif
3845 
3846 
3847 #ifndef USTRING_NO_STL
3848 template <typename TAlloc>
3850 inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
3851 {
3852  out << in.toUTF8_s().c_str();
3853  return out;
3854 }
3855 
3857 template <typename TAlloc>
3858 inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
3859 {
3860  out << in.toWCHAR_s().c_str();
3861  return out;
3862 }
3863 #endif
3864 
3865 
3866 #ifndef USTRING_NO_STL
3867 
3868 namespace unicode
3869 {
3870 
3873 class hash : public std::unary_function<core::ustring, size_t>
3874 {
3875  public:
3876  size_t operator()(const core::ustring& s) const
3877  {
3878  size_t ret = 2166136261U;
3879  size_t index = 0;
3880  size_t stride = 1 + s.size_raw() / 10;
3881 
3882  core::ustring::const_iterator i = s.begin();
3883  while (i != s.end())
3884  {
3885  ret = 16777619U * ret ^ (size_t)s[index];
3886  index += stride;
3887  i += stride;
3888  }
3889  return (ret);
3890  }
3891 };
3892 
3893 } // end namespace unicode
3894 
3895 #endif
3896 
3897 } // end namespace core
3898 } // end namespace irr
3899 
3900 #endif
ustring16< TAlloc > & insert_raw(uchar16_t c, size_t pos)
Definition: irrUString.h:2697
ustring16(const uchar32_t *const c, size_t length)
Constructor for copying a UTF-32 from a pointer with a given length.
Definition: irrUString.h:1002
void set_used(u32 usedNow)
Sets the size of the array and allocates new elements if necessary.
Definition: irrArray.h:253
u16 uchar16_t
Definition: irrUString.h:86
_Iter operator++(int)
Switch to the next full character in the string, returning the previous position.
Definition: irrUString.h:522
ustring16< TAlloc > & append(const uchar16_t *const other, size_t length=0xffffffff)
Definition: irrUString.h:1567
const u8 BOM_ENCODE_UTF32_LEN
Definition: irrUString.h:146
GLdouble GLdouble t
Definition: SDL_opengl.h:2071
bool operator !=(const ustring16< TAlloc > &other) const
Inequality operator.
Definition: irrUString.h:1283
ustring16< irrAllocator< uchar16_t > > ustring
Definition: irrUString.h:3289
GLdouble n
const GLint * first
u32 size() const
Returns length of the string's content.
Definition: irrString.h:502
_ustring16_const_iterator(const _Iter &i)
Constructors.
Definition: irrUString.h:473
const u8 BOM_ENCODE_UTF8[3]
Unicode byte order marks for file operations.
Definition: irrUString.h:137
ustring16< TAlloc > & trim(const ustring16< TAlloc > &whitespace=" \t\n\r")
Definition: irrUString.h:2427
array()
Default constructor for empty array.
Definition: irrArray.h:28
ustring16(const uchar8_t *const c, size_t length)
Constructor for copying a UTF-8 string from a pointer with a given length.
Definition: irrUString.h:946
GLsizei const GLchar *const * string
access operator [](const size_t index)
Direct access operator.
Definition: irrUString.h:1218
_ustring16_iterator_access & operator=(const uchar32_t c)
Definition: irrUString.h:259
_ustring16_iterator_access & operator--()
Definition: irrUString.h:284
UTF-16 string class.
Definition: irrUString.h:236
const_iterator end() const
Definition: irrUString.h:2778
bool operator==(const uchar16_t *const str) const
Equality operator.
Definition: irrUString.h:1236
const u8 BOM_UTF16_LEN
Definition: irrUString.h:133
string< T, TAlloc > & append(T character)
Appends a character to this string.
Definition: irrString.h:634
ustring16(const uchar8_t *const c)
Constructor for copying a UTF-8 string from a pointer.
Definition: irrUString.h:918
ustring16(const wchar_t *const c)
Constructor for copying a wchar_t string from a pointer.
Definition: irrUString.h:1016
GLuint GLuint GLsizei count
Definition: SDL_opengl.h:1571
const access operator [](const size_t index) const
Direct access operator.
Definition: irrUString.h:1227
_Iter operator--(int)
Switch to the previous full character in the string, returning the previous position.
Definition: irrUString.h:541
bool atStart() const
Is the iterator at the start of the string?
Definition: irrUString.h:690
const unicode::EUTF_ENDIAN getEndianness() const
Definition: irrUString.h:3247
ustring16< TAlloc >::_ustring16_iterator iterator
Definition: irrUString.h:797
#define UTF16_IS_SURROGATE_LO(c)
Definition: irrUString.h:73
size_t size() const
Definition: irrUString.h:1291
const u8 BOM_ENCODE_UTF16_LEN
Definition: irrUString.h:145
ustring16(const char *const c, size_t length)
Constructor for copying a character string from a pointer with a given length.
Definition: irrUString.h:903
size_t find_raw(const ustring16< TAlloc > &str, const size_t start=0) const
Definition: irrUString.h:1994
core::array< uchar16_t > toUTF16(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Definition: irrUString.h:2956
const_iterator cend() const
Definition: irrUString.h:2788
void toStart()
Moves the iterator to the start of the string.
Definition: irrUString.h:705
GLuint GLuint end
Definition: SDL_opengl.h:1571
bool operator!=(const _Iter &iter) const
Test for unequalness.
Definition: irrUString.h:502
GLfloat GLfloat p
bool atEnd() const
Is the iterator at the end of the string?
Definition: irrUString.h:696
const T * const_pointer() const
Gets a const pointer to the array.
Definition: irrArray.h:356
Iterator to iterate through a UTF-16 string.
Definition: irrUString.h:448
#define COPY_ARRAY(source, size)
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: SDL_opengl.h:1974
bool operator<(const ustring16< TAlloc > &other) const
Is smaller comparator.
Definition: irrUString.h:1262
Iterator to iterate through a UTF-16 string.
Definition: irrUString.h:730
ustring16< TAlloc > & append(const ustring16< TAlloc > &other)
Definition: irrUString.h:1708
_Iter & operator++()
Switch to the next full character in the string.
Definition: irrUString.h:510
const_reference operator *() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:646
static const size_t npos
Definition: irrUString.h:808
_Iter & operator--()
Switch to the previous full character in the string.
Definition: irrUString.h:530
ustring16< TAlloc > & replace_raw(uchar16_t c, size_t pos)
Definition: irrUString.h:2732
size_t findLastChar(const uchar32_t *const c, size_t count=1) const
Definition: irrUString.h:1929
const unicode::EUTF_ENCODE getEncoding() const
Definition: irrUString.h:3240
GLuint start
Definition: SDL_opengl.h:1571
ustring16< TAlloc > & loadDataStream(const char *data, size_t data_size)
Definition: irrUString.h:3207
core::array< uchar8_t > toUTF8(const bool addBOM=false) const
Definition: irrUString.h:2856
#define UTF16_IS_SURROGATE_HI(c)
Definition: irrUString.h:72
core::string< uchar8_t > toUTF8_s(const bool addBOM=false) const
Definition: irrUString.h:2799
core::array< wchar_t > toWCHAR(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a wchar_t encoded string array.
Definition: irrUString.h:3148
Everything in the Irrlicht Engine can be found in this namespace.
Definition: CARSADPad.h:6
ustring16(const wchar_t *const c, size_t length)
Constructor for copying a wchar_t string from a pointer with a given length.
Definition: irrUString.h:1035
const u16 BOM
The Unicode byte order mark.
Definition: irrUString.h:129
uchar32_t swapEndian32(const uchar32_t &c)
Definition: irrUString.h:120
core::string< wchar_t > toWCHAR_s(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Converts the string to a wchar_t encoded string.
Definition: irrUString.h:3108
GLsizeiptr size
ustring16< TAlloc > & removeChars(const ustring16< TAlloc > &characters)
Definition: irrUString.h:2377
GLenum GLsizei len
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
Very simple string class with some useful features.
Definition: irrString.h:37
GLsizei const void * pointer
GLsizei GLsizei GLfloat distance
size_t find(const ustring16< TAlloc > &str, const size_t start=0) const
Definition: irrUString.h:1956
void push_back(const T &element)
Adds an element at back of array.
Definition: irrArray.h:111
unsigned char u8
8 bit unsigned variable.
Definition: irrTypes.h:22
ustring16< TAlloc > & operator=(const string< B, A > &other)
Assignment operator for other string types.
Definition: irrUString.h:1123
const_pointer operator->() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:678
ustring16< TAlloc > & operator=(const wchar_t *const c)
Assignment operator for wchar_t strings.
Definition: irrUString.h:1188
EUTF_ENCODE
Unicode encoding type.
Definition: irrUString.h:149
_Base::difference_type difference_type
Definition: irrUString.h:739
EUTF_ENCODE determineUnicodeBOM(const char *data)
Definition: irrUString.h:221
size_t findFirst(uchar32_t c) const
Definition: irrUString.h:1773
void reserve(u32 count)
Reserves some memory.
Definition: irrString.h:762
unsigned short u16
16 bit unsigned variable.
Definition: irrTypes.h:44
const GLdouble * v
Definition: SDL_opengl.h:2064
GLsizei stride
_ustring16_const_iterator(const ustring16< TAlloc > &s)
Definition: irrUString.h:474
ustring16< TAlloc > & validate()
Definition: irrUString.h:2468
const u8 BOM_ENCODE_UTF32_LE[4]
Definition: irrUString.h:141
ustring16(const uchar16_t *const c, size_t length)
Constructor for copying a UTF-16 string from a pointer with a given length.
Definition: irrUString.h:974
core::array< uchar32_t > toUTF32(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Definition: irrUString.h:3053
ustring16< TAlloc >::_ustring16_iterator_access access
Definition: irrUString.h:435
pointer operator->() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:785
size_t split(container &ret, const ustring16< TAlloc > &c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
Split the ustring16 into parts.
Definition: irrUString.h:2605
_ustring16_iterator_access & operator%=(int val)
Definition: irrUString.h:338
_ustring16_iterator(const ustring16< TAlloc > &s)
Definition: irrUString.h:749
_ustring16_iterator_access & operator-=(int val)
Definition: irrUString.h:311
GLuint GLfloat * val
bool equalsn(const uchar16_t *const str, size_t n) const
Definition: irrUString.h:1342
ustring16< TAlloc > & insert(const ustring16< TAlloc > &c, size_t pos)
Definition: irrUString.h:2667
reference operator *() const
Accesses the full character at the iterator's position.
Definition: irrUString.h:753
ustring16< TAlloc > & append(const ustring16< TAlloc > &other, size_t length)
Definition: irrUString.h:1731
const_iterator cbegin() const
Definition: irrUString.h:2759
ustring16< TAlloc > & append(uchar32_t character)
Definition: irrUString.h:1360
size_t size_raw() const
Definition: irrUString.h:2622
const_iterator begin() const
Definition: irrUString.h:2750
const T * c_str() const
Returns character string.
Definition: irrString.h:526
ustring16< TAlloc > & erase_raw(size_t pos)
Definition: irrUString.h:2716
const u8 BOM_ENCODE_UTF16_BE[2]
Definition: irrUString.h:138
_Iter operator-(const difference_type v) const
Return a new iterator that is a variable number of full characters backward from the current position...
Definition: irrUString.h:608
uchar16_t swapEndian16(const uchar16_t &c)
Definition: irrUString.h:113
unsigned int u32
32 bit unsigned variable.
Definition: irrTypes.h:62
ustring16(const ustring16< TAlloc > &other)
Constructor.
Definition: irrUString.h:826
size_t findFirstChar(const uchar32_t *const c, size_t count=1) const
Definition: irrUString.h:1794
ustring16(const string< B, A > &other)
Constructor from other string types.
Definition: irrUString.h:840
GLenum mode
u8 uchar8_t
Definition: irrUString.h:87
_ustring16_iterator_access(const ustring16< TAlloc > *s, size_t p)
Definition: irrUString.h:248
#define _IRR_DEBUG_BREAK_IF(_CONDITION_)
define a break macro for debugging.
Definition: irrTypes.h:185
size_t capacity() const
Definition: irrUString.h:2614
core::array< u8 > getUnicodeBOM(EUTF_ENCODE mode)
Definition: irrUString.h:174
pointer operator->()
Accesses the full character at the iterator's position.
Definition: irrUString.h:791
pointer operator->()
Accesses the full character at the iterator's position.
Definition: irrUString.h:684
ustring16< TAlloc > & insert(uchar32_t c, size_t pos)
Definition: irrUString.h:2632
string< c8 > stringc
Typedef for character strings.
Definition: irrString.h:1458
_ustring16_iterator(const ustring16< TAlloc > &s, const size_t p)
Definition: irrUString.h:750
size_t split(container &ret, const uchar32_t *const c, size_t count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
Split the ustring16 into parts.
Definition: irrUString.h:2548
ustring16()
Default constructor.
Definition: irrUString.h:812
_ustring16_iterator_access & operator *=(int val)
Definition: irrUString.h:320
GLuint index
const u8 BOM_UTF8_LEN
The size of the Unicode byte order mark in terms of the Unicode character size.
Definition: irrUString.h:132
size_t findLastCharNotInList(const uchar32_t *const c, size_t count=1) const
Definition: irrUString.h:1849
bool equalsn(const ustring16< TAlloc > &other, size_t n) const
Definition: irrUString.h:1324
ustring16(const char c)
Constructor for copying a UTF-8 string from a single char.
Definition: irrUString.h:932
ustring16< TAlloc > & operator=(const uchar32_t *const c)
Assignment operator for UTF-32 strings.
Definition: irrUString.h:1167
bool empty() const
Definition: irrUString.h:1306
uchar32_t toUTF32(uchar16_t high, uchar16_t low)
Definition: irrUString.h:103
ustring16< TAlloc > & erase(size_t index)
Definition: irrUString.h:2447
size_t findLast(uchar32_t c, size_t start=ustring16< TAlloc >::npos) const
Definition: irrUString.h:1905
Self reallocating template array (like stl vector) with additional features.
Definition: irrArray.h:22
io::path toPATH_s(const unicode::EUTF_ENDIAN endian=unicode::EUTFEE_NATIVE, const bool addBOM=false) const
Definition: irrUString.h:3193
GLint left
GLdouble s
Definition: SDL_opengl.h:2063
_ustring16_iterator_access & operator++()
Definition: irrUString.h:267
GLint GLint GLint GLint GLint x
Definition: SDL_opengl.h:1574
EUTF_ENDIAN
Unicode endianness.
Definition: irrUString.h:162
const irr::u16 UTF_REPLACEMENT_CHARACTER
The unicode replacement character. Used to replace invalid characters.
Definition: irrUString.h:97
ustring16< TAlloc > & operator=(const B *const c)
Assignment operator for other strings.
Definition: irrUString.h:1204
ustring16< TAlloc > & append(const uchar32_t *const other, size_t length=0xffffffff)
Definition: irrUString.h:1632
ustring16< TAlloc > & replace(const ustring16< TAlloc > &toReplace, const ustring16< TAlloc > &replaceWith)
Definition: irrUString.h:2195
_Iter & operator-=(const difference_type v)
Definition: irrUString.h:577
ustring16< TAlloc >::_ustring16_const_iterator const_iterator
Definition: irrUString.h:798
size_t findNext(uchar32_t c, size_t startPos) const
Definition: irrUString.h:1883
ustring16(const uchar16_t *const c)
Constructor for copying a UTF-16 string from a pointer.
Definition: irrUString.h:960
ustring16< TAlloc > & replace(uchar32_t toReplace, uchar32_t replaceWith)
Definition: irrUString.h:2177
const u8 BOM_ENCODE_UTF32_BE[4]
Definition: irrUString.h:140
ustring16< TAlloc > & operator=(const uchar8_t *const c)
Assignment operator for UTF-8 strings.
Definition: irrUString.h:1131
GLenum array
bool operator !=(const uchar16_t *const str) const
Inequality operator.
Definition: irrUString.h:1276
ustring16< TAlloc > & remove(const ustring16< TAlloc > &toRemove)
Definition: irrUString.h:2342
_ustring16_const_iterator(const ustring16< TAlloc > &s, const size_t p)
Definition: irrUString.h:475
GLdouble GLdouble right
GLboolean GLboolean GLboolean GLboolean a
difference_type operator-(const _Iter &iter) const
Returns the distance between two iterators.
Definition: irrUString.h:616
~ustring16()
Destructor.
Definition: irrUString.h:1067
_ustring16_iterator_access & operator+=(int val)
Definition: irrUString.h:302
_Iter operator+(const difference_type v) const
Return a new iterator that is a variable number of full characters forward from the current position.
Definition: irrUString.h:600
ustring16< TAlloc > & operator=(const uchar16_t *const c)
Assignment operator for UTF-16 strings.
Definition: irrUString.h:1149
_ustring16_iterator(const _Iter &i)
Constructors.
Definition: irrUString.h:748
uchar32_t lastChar() const
Definition: irrUString.h:2504
size_t findFirstCharNotInList(const uchar32_t *const c, size_t count=1) const
Definition: irrUString.h:1820
bool operator==(const _Iter &iter) const
Test for equalness.
Definition: irrUString.h:494
GLenum GLint ref
GLuint GLsizei GLsizei * length
ustring16(const uchar32_t *const c)
Constructor for copying a UTF-32 string from a pointer.
Definition: irrUString.h:988
ustring16< TAlloc > subString(size_t begin, size_t length) const
Definition: irrUString.h:2027
const GLubyte * c
GLuint in
ustring16 & operator=(const ustring16< TAlloc > &other)
Assignment operator.
Definition: irrUString.h:1074
_Iter & operator+=(const difference_type v)
Definition: irrUString.h:550
_ustring16_iterator_access & operator/=(int val)
Definition: irrUString.h:329
const u8 BOM_ENCODE_UTF16_LE[2]
Definition: irrUString.h:139
ustring16< TAlloc > & append(const uchar8_t *const other, size_t length=0xffffffff)
Definition: irrUString.h:1391
Access an element in a unicode string, allowing one to change it.
Definition: irrUString.h:245
_Base::const_reference const_reference
Definition: irrUString.h:736
ustring16< TAlloc > & operator+=(char c)
Definition: irrUString.h:2064
T * pointer()
Gets a pointer to the array.
Definition: irrArray.h:348
u32 uchar32_t
Definition: irrUString.h:85
void reserve(size_t count)
Definition: irrUString.h:1761
const u8 BOM_ENCODE_UTF8_LEN
The size in bytes of the Unicode byte marks for file operations.
Definition: irrUString.h:144
const uchar16_t * c_str() const
Definition: irrUString.h:1314
_ustring16_const_iterator _Base
Definition: irrUString.h:734
ustring16< TAlloc > & remove(uchar32_t c)
Definition: irrUString.h:2306
const u8 BOM_UTF32_LEN
Definition: irrUString.h:134
#define UTF16_IS_SURROGATE(c)
Is a UTF-16 code point a surrogate?
Definition: irrUString.h:71
ustring16< TAlloc > operator+(const ustring16< TAlloc > &left, const ustring16< TAlloc > &right)
Appends two ustring16s.
Definition: irrUString.h:3294
unsigned int size_t
void toEnd()
Moves the iterator to the end of the string.
Definition: irrUString.h:711
string< wchar_t > stringw
Typedef for wide character strings.
Definition: irrString.h:1461