Enumerations
enum	EUTF_ENCODE { EUTFE_NONE = 0, EUTFE_UTF8, EUTFE_UTF16, EUTFE_UTF16_LE, EUTFE_UTF16_BE, EUTFE_UTF32, EUTFE_UTF32_LE, EUTFE_UTF32_BE }
	Unicode encoding type. More...

enum	EUTF_ENDIAN { EUTFEE_NATIVE = 0, EUTFEE_LITTLE, EUTFEE_BIG }
	Unicode endianness. More...

Functions
uchar32_t	toUTF32 (uchar16_t high, uchar16_t low)

uchar16_t	swapEndian16 (const uchar16_t &c)

uchar32_t	swapEndian32 (const uchar32_t &c)

core::array< u8 >	getUnicodeBOM (EUTF_ENCODE mode)

EUTF_ENCODE	determineUnicodeBOM (const char *data)

Variables
const irr::u16	UTF_REPLACEMENT_CHARACTER = 0xFFFD
	The unicode replacement character. Used to replace invalid characters. More...

const u16	BOM = 0xFEFF
	The Unicode byte order mark. More...

const u8	BOM_UTF8_LEN = 3
	The size of the Unicode byte order mark in terms of the Unicode character size. More...

const u8	BOM_UTF16_LEN = 1

const u8	BOM_UTF32_LEN = 1

const u8	BOM_ENCODE_UTF8 [3] = { 0xEF, 0xBB, 0xBF }
	Unicode byte order marks for file operations. More...

const u8	BOM_ENCODE_UTF16_BE [2] = { 0xFE, 0xFF }

const u8	BOM_ENCODE_UTF16_LE [2] = { 0xFF, 0xFE }

const u8	BOM_ENCODE_UTF32_BE [4] = { 0x00, 0x00, 0xFE, 0xFF }

const u8	BOM_ENCODE_UTF32_LE [4] = { 0xFF, 0xFE, 0x00, 0x00 }

const u8	BOM_ENCODE_UTF8_LEN = 3
	The size in bytes of the Unicode byte marks for file operations. More...

const u8	BOM_ENCODE_UTF16_LEN = 2

const u8	BOM_ENCODE_UTF32_LEN = 4

Enumeration Type Documentation

◆ EUTF_ENCODE

enum irr::core::unicode::EUTF_ENCODE

Unicode encoding type.

Enumerator
EUTFE_NONE
EUTFE_UTF8
EUTFE_UTF16
EUTFE_UTF16_LE
EUTFE_UTF16_BE
EUTFE_UTF32
EUTFE_UTF32_LE
EUTFE_UTF32_BE

Definition at line 149 of file irrUString.h.

 {
         EUTFE_NONE              = 0,
         EUTFE_UTF8,
         EUTFE_UTF16,
         EUTFE_UTF16_LE,
         EUTFE_UTF16_BE,
         EUTFE_UTF32,
         EUTFE_UTF32_LE,
         EUTFE_UTF32_BE
 };

◆ EUTF_ENDIAN

enum irr::core::unicode::EUTF_ENDIAN

Unicode endianness.

Enumerator
EUTFEE_NATIVE
EUTFEE_LITTLE
EUTFEE_BIG

Definition at line 162 of file irrUString.h.

 {
         EUTFEE_NATIVE   = 0,
         EUTFEE_LITTLE,
         EUTFEE_BIG
 };

Function Documentation

◆ determineUnicodeBOM()

EUTF_ENCODE irr::core::unicode::determineUnicodeBOM ( const char * data )

inline

Detects if the given data stream starts with a unicode BOM.

Parameters

data	The data stream to check.

Returns: The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.

Definition at line 221 of file irrUString.h.

 {
         if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
         if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
         if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
         if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
         if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
         return EUTFE_NONE;
 }

◆ getUnicodeBOM()

core::array<u8> irr::core::unicode::getUnicodeBOM ( EUTF_ENCODE mode )

inline

Returns the specified unicode byte order mark in a byte array. The byte order mark is the first few bytes in a text file that signifies its encoding.

Parameters

mode	The Unicode encoding method that we want to get the byte order mark for. If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness.

Returns: An array that contains a byte order mark.

Definition at line 174 of file irrUString.h.

 {
 #define COPY_ARRAY(source, size) \
         memcpy(ret.pointer(), source, size); \
         ret.set_used(size)
 
         core::array<u8> ret(4);
         switch (mode)
         {
                 case EUTFE_UTF8:
                         COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
                         break;
                 case EUTFE_UTF16:
                         #ifdef __BIG_ENDIAN__
                                 COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
                         #else
                                 COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
                         #endif
                         break;
                 case EUTFE_UTF16_BE:
                         COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
                         break;
                 case EUTFE_UTF16_LE:
                         COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
                         break;
                 case EUTFE_UTF32:
                         #ifdef __BIG_ENDIAN__
                                 COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
                         #else
                                 COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
                         #endif
                         break;
                 case EUTFE_UTF32_BE:
                         COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
                         break;
                 case EUTFE_UTF32_LE:
                         COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
                         break;
         }
         return ret;
 
 #undef COPY_ARRAY
 }

◆ swapEndian16()

uchar16_t irr::core::unicode::swapEndian16 ( const uchar16_t & c )

inline

Swaps the endianness of a 16-bit value.

Returns: The new value.

Definition at line 113 of file irrUString.h.

 {
         return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
 }

◆ swapEndian32()

uchar32_t irr::core::unicode::swapEndian32 ( const uchar32_t & c )

inline

Swaps the endianness of a 32-bit value.

Returns: The new value.

Definition at line 120 of file irrUString.h.

 {
         return  ((c >> 24) & 0x000000FF) |
                         ((c >> 8)  & 0x0000FF00) |
                         ((c << 8)  & 0x00FF0000) |
                         ((c << 24) & 0xFF000000);
 }

◆ toUTF32()

uchar32_t irr::core::unicode::toUTF32	(	uchar16_t	high,
		uchar16_t	low
	)

inline

Convert a UTF-16 surrogate pair into a UTF-32 character.

Parameters

high	The high value of the pair.
low	The low value of the pair.

Returns: The UTF-32 character expressed by the surrogate pair.

Definition at line 103 of file irrUString.h.

 {
         // Convert the surrogate pair into a single UTF-32 character.
         uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
         uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
         return (wu << 16) | x;
 }

Variable Documentation

◆ BOM

const u16 irr::core::unicode::BOM = 0xFEFF

The Unicode byte order mark.

Definition at line 129 of file irrUString.h.

◆ BOM_ENCODE_UTF16_BE

const u8 irr::core::unicode::BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF }

Definition at line 138 of file irrUString.h.

◆ BOM_ENCODE_UTF16_LE

const u8 irr::core::unicode::BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE }

Definition at line 139 of file irrUString.h.

◆ BOM_ENCODE_UTF16_LEN

const u8 irr::core::unicode::BOM_ENCODE_UTF16_LEN = 2

Definition at line 145 of file irrUString.h.

◆ BOM_ENCODE_UTF32_BE

const u8 irr::core::unicode::BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF }

Definition at line 140 of file irrUString.h.

◆ BOM_ENCODE_UTF32_LE

const u8 irr::core::unicode::BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 }

Definition at line 141 of file irrUString.h.

◆ BOM_ENCODE_UTF32_LEN

const u8 irr::core::unicode::BOM_ENCODE_UTF32_LEN = 4

Definition at line 146 of file irrUString.h.

◆ BOM_ENCODE_UTF8

const u8 irr::core::unicode::BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF }

Unicode byte order marks for file operations.

Definition at line 137 of file irrUString.h.

◆ BOM_ENCODE_UTF8_LEN

const u8 irr::core::unicode::BOM_ENCODE_UTF8_LEN = 3

The size in bytes of the Unicode byte marks for file operations.

Definition at line 144 of file irrUString.h.

◆ BOM_UTF16_LEN

const u8 irr::core::unicode::BOM_UTF16_LEN = 1

Definition at line 133 of file irrUString.h.

◆ BOM_UTF32_LEN

const u8 irr::core::unicode::BOM_UTF32_LEN = 1

Definition at line 134 of file irrUString.h.

◆ BOM_UTF8_LEN

const u8 irr::core::unicode::BOM_UTF8_LEN = 3

The size of the Unicode byte order mark in terms of the Unicode character size.

Definition at line 132 of file irrUString.h.

◆ UTF_REPLACEMENT_CHARACTER

const irr::u16 irr::core::unicode::UTF_REPLACEMENT_CHARACTER = 0xFFFD

The unicode replacement character. Used to replace invalid characters.

Definition at line 97 of file irrUString.h.

Enumerations

Functions

Variables

Enumeration Type Documentation

◆ EUTF_ENCODE

◆ EUTF_ENDIAN

Function Documentation

◆ determineUnicodeBOM()

◆ getUnicodeBOM()

◆ swapEndian16()

◆ swapEndian32()

◆ toUTF32()

Variable Documentation

◆ BOM

◆ BOM_ENCODE_UTF16_BE

◆ BOM_ENCODE_UTF16_LE

◆ BOM_ENCODE_UTF16_LEN

◆ BOM_ENCODE_UTF32_BE

◆ BOM_ENCODE_UTF32_LE

◆ BOM_ENCODE_UTF32_LEN

◆ BOM_ENCODE_UTF8

◆ BOM_ENCODE_UTF8_LEN

◆ BOM_UTF16_LEN

◆ BOM_UTF32_LEN

◆ BOM_UTF8_LEN

◆ UTF_REPLACEMENT_CHARACTER