arsa  2.7
Enumerations | Functions | Variables
irr::core::unicode Namespace Reference

Enumerations

enum  EUTF_ENCODE {
  EUTFE_NONE = 0, EUTFE_UTF8, EUTFE_UTF16, EUTFE_UTF16_LE,
  EUTFE_UTF16_BE, EUTFE_UTF32, EUTFE_UTF32_LE, EUTFE_UTF32_BE
}
 Unicode encoding type. More...
 
enum  EUTF_ENDIAN { EUTFEE_NATIVE = 0, EUTFEE_LITTLE, EUTFEE_BIG }
 Unicode endianness. More...
 

Functions

uchar32_t toUTF32 (uchar16_t high, uchar16_t low)
 
uchar16_t swapEndian16 (const uchar16_t &c)
 
uchar32_t swapEndian32 (const uchar32_t &c)
 
core::array< u8getUnicodeBOM (EUTF_ENCODE mode)
 
EUTF_ENCODE determineUnicodeBOM (const char *data)
 

Variables

const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD
 The unicode replacement character. Used to replace invalid characters. More...
 
const u16 BOM = 0xFEFF
 The Unicode byte order mark. More...
 
const u8 BOM_UTF8_LEN = 3
 The size of the Unicode byte order mark in terms of the Unicode character size. More...
 
const u8 BOM_UTF16_LEN = 1
 
const u8 BOM_UTF32_LEN = 1
 
const u8 BOM_ENCODE_UTF8 [3] = { 0xEF, 0xBB, 0xBF }
 Unicode byte order marks for file operations. More...
 
const u8 BOM_ENCODE_UTF16_BE [2] = { 0xFE, 0xFF }
 
const u8 BOM_ENCODE_UTF16_LE [2] = { 0xFF, 0xFE }
 
const u8 BOM_ENCODE_UTF32_BE [4] = { 0x00, 0x00, 0xFE, 0xFF }
 
const u8 BOM_ENCODE_UTF32_LE [4] = { 0xFF, 0xFE, 0x00, 0x00 }
 
const u8 BOM_ENCODE_UTF8_LEN = 3
 The size in bytes of the Unicode byte marks for file operations. More...
 
const u8 BOM_ENCODE_UTF16_LEN = 2
 
const u8 BOM_ENCODE_UTF32_LEN = 4
 

Enumeration Type Documentation

◆ EUTF_ENCODE

Unicode encoding type.

Enumerator
EUTFE_NONE 
EUTFE_UTF8 
EUTFE_UTF16 
EUTFE_UTF16_LE 
EUTFE_UTF16_BE 
EUTFE_UTF32 
EUTFE_UTF32_LE 
EUTFE_UTF32_BE 

Definition at line 149 of file irrUString.h.

◆ EUTF_ENDIAN

Unicode endianness.

Enumerator
EUTFEE_NATIVE 
EUTFEE_LITTLE 
EUTFEE_BIG 

Definition at line 162 of file irrUString.h.

Function Documentation

◆ determineUnicodeBOM()

EUTF_ENCODE irr::core::unicode::determineUnicodeBOM ( const char *  data)
inline

Detects if the given data stream starts with a unicode BOM.

Parameters
dataThe data stream to check.
Returns
The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.

Definition at line 221 of file irrUString.h.

222 {
223  if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
224  if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
225  if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
226  if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
227  if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
228  return EUTFE_NONE;
229 }
const u8 BOM_ENCODE_UTF8[3]
Unicode byte order marks for file operations.
Definition: irrUString.h:137
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: SDL_opengl.h:1974
const u8 BOM_ENCODE_UTF32_LE[4]
Definition: irrUString.h:141
const u8 BOM_ENCODE_UTF16_BE[2]
Definition: irrUString.h:138
const u8 BOM_ENCODE_UTF32_BE[4]
Definition: irrUString.h:140
const u8 BOM_ENCODE_UTF16_LE[2]
Definition: irrUString.h:139

◆ getUnicodeBOM()

core::array<u8> irr::core::unicode::getUnicodeBOM ( EUTF_ENCODE  mode)
inline

Returns the specified unicode byte order mark in a byte array. The byte order mark is the first few bytes in a text file that signifies its encoding.

Parameters
modeThe Unicode encoding method that we want to get the byte order mark for. If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness.
Returns
An array that contains a byte order mark.

Definition at line 174 of file irrUString.h.

175 {
176 #define COPY_ARRAY(source, size) \
177  memcpy(ret.pointer(), source, size); \
178  ret.set_used(size)
179 
180  core::array<u8> ret(4);
181  switch (mode)
182  {
183  case EUTFE_UTF8:
185  break;
186  case EUTFE_UTF16:
187  #ifdef __BIG_ENDIAN__
189  #else
191  #endif
192  break;
193  case EUTFE_UTF16_BE:
195  break;
196  case EUTFE_UTF16_LE:
198  break;
199  case EUTFE_UTF32:
200  #ifdef __BIG_ENDIAN__
202  #else
204  #endif
205  break;
206  case EUTFE_UTF32_BE:
208  break;
209  case EUTFE_UTF32_LE:
211  break;
212  }
213  return ret;
214 
215 #undef COPY_ARRAY
216 }
const u8 BOM_ENCODE_UTF32_LEN
Definition: irrUString.h:146
const u8 BOM_ENCODE_UTF8[3]
Unicode byte order marks for file operations.
Definition: irrUString.h:137
const u8 BOM_ENCODE_UTF16_LEN
Definition: irrUString.h:145
#define COPY_ARRAY(source, size)
const u8 BOM_ENCODE_UTF32_LE[4]
Definition: irrUString.h:141
const u8 BOM_ENCODE_UTF16_BE[2]
Definition: irrUString.h:138
GLenum mode
const u8 BOM_ENCODE_UTF32_BE[4]
Definition: irrUString.h:140
const u8 BOM_ENCODE_UTF16_LE[2]
Definition: irrUString.h:139
const u8 BOM_ENCODE_UTF8_LEN
The size in bytes of the Unicode byte marks for file operations.
Definition: irrUString.h:144

◆ swapEndian16()

uchar16_t irr::core::unicode::swapEndian16 ( const uchar16_t c)
inline

Swaps the endianness of a 16-bit value.

Returns
The new value.

Definition at line 113 of file irrUString.h.

114 {
115  return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
116 }
const GLubyte * c

◆ swapEndian32()

uchar32_t irr::core::unicode::swapEndian32 ( const uchar32_t c)
inline

Swaps the endianness of a 32-bit value.

Returns
The new value.

Definition at line 120 of file irrUString.h.

121 {
122  return ((c >> 24) & 0x000000FF) |
123  ((c >> 8) & 0x0000FF00) |
124  ((c << 8) & 0x00FF0000) |
125  ((c << 24) & 0xFF000000);
126 }
const GLubyte * c

◆ toUTF32()

uchar32_t irr::core::unicode::toUTF32 ( uchar16_t  high,
uchar16_t  low 
)
inline

Convert a UTF-16 surrogate pair into a UTF-32 character.

Parameters
highThe high value of the pair.
lowThe low value of the pair.
Returns
The UTF-32 character expressed by the surrogate pair.

Definition at line 103 of file irrUString.h.

104 {
105  // Convert the surrogate pair into a single UTF-32 character.
106  uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
107  uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
108  return (wu << 16) | x;
109 }
GLint GLint GLint GLint GLint x
Definition: SDL_opengl.h:1574
u32 uchar32_t
Definition: irrUString.h:85

Variable Documentation

◆ BOM

const u16 irr::core::unicode::BOM = 0xFEFF

The Unicode byte order mark.

Definition at line 129 of file irrUString.h.

◆ BOM_ENCODE_UTF16_BE

const u8 irr::core::unicode::BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF }

Definition at line 138 of file irrUString.h.

◆ BOM_ENCODE_UTF16_LE

const u8 irr::core::unicode::BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE }

Definition at line 139 of file irrUString.h.

◆ BOM_ENCODE_UTF16_LEN

const u8 irr::core::unicode::BOM_ENCODE_UTF16_LEN = 2

Definition at line 145 of file irrUString.h.

◆ BOM_ENCODE_UTF32_BE

const u8 irr::core::unicode::BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF }

Definition at line 140 of file irrUString.h.

◆ BOM_ENCODE_UTF32_LE

const u8 irr::core::unicode::BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 }

Definition at line 141 of file irrUString.h.

◆ BOM_ENCODE_UTF32_LEN

const u8 irr::core::unicode::BOM_ENCODE_UTF32_LEN = 4

Definition at line 146 of file irrUString.h.

◆ BOM_ENCODE_UTF8

const u8 irr::core::unicode::BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF }

Unicode byte order marks for file operations.

Definition at line 137 of file irrUString.h.

◆ BOM_ENCODE_UTF8_LEN

const u8 irr::core::unicode::BOM_ENCODE_UTF8_LEN = 3

The size in bytes of the Unicode byte marks for file operations.

Definition at line 144 of file irrUString.h.

◆ BOM_UTF16_LEN

const u8 irr::core::unicode::BOM_UTF16_LEN = 1

Definition at line 133 of file irrUString.h.

◆ BOM_UTF32_LEN

const u8 irr::core::unicode::BOM_UTF32_LEN = 1

Definition at line 134 of file irrUString.h.

◆ BOM_UTF8_LEN

const u8 irr::core::unicode::BOM_UTF8_LEN = 3

The size of the Unicode byte order mark in terms of the Unicode character size.

Definition at line 132 of file irrUString.h.

◆ UTF_REPLACEMENT_CHARACTER

const irr::u16 irr::core::unicode::UTF_REPLACEMENT_CHARACTER = 0xFFFD

The unicode replacement character. Used to replace invalid characters.

Definition at line 97 of file irrUString.h.