/*
*******************************************************************************
*
* Copyright (C) 1999-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf8.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 8-bit Unicode handling macros
*
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
* utf8.h is included by utf.h after unicode/umachine.h
* and some common definitions.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (http://icu-project.org/userguide/strings.html).
*
* Usage:
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF8_H__
#define __UTF8_H__
/* utf.h must be included first. */
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* internal definitions ----------------------------------------------------- */
/**
* \var utf8_countTrailBytes
* Internal array with numbers of trail bytes for any given byte used in
* lead byte position.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable,
* and should not be hidden when other internal functions are hidden (otherwise
* public macros would fail to compile).
* @internal
*/
#ifdef U_UTF8_IMPL
U_EXPORT const uint8_t
#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
U_CFUNC const uint8_t
#else
U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/
#endif
utf8_countTrailBytes[256];
/**
* Count the trail bytes for a UTF-8 lead byte.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
* @internal
*/
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(10x10ffff ? 0 : \
((uint32_t)(c)=0x80) { \
uint8_t __t1, __t2; \
if( /* handle U+1000..U+CFFF inline */ \
(0xe0=0xc2) && \
((i)>6)|0xc0); \
} else { \
if((uint32_t)(c)>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
} \
}
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a non-ASCII code point is written, checks for sufficient space in the string.
* If the code point is not valid or trail bytes do not fit,
* then isError is set to TRUE.
*
* @param s const uint8_t * string buffer
* @param i string offset, must be i