/*============================================================================= * drt/sys/kchar.h * DRT Unicode character classes. * * These classes provide a portable representation of Unicode characters. * * Notes * The 'DrtChar' type is used to represent Unicode characters rather than * the 'wchar_t' because the latter varies across implementations and can * be as small as 8 bits or as large as 64 bits. * Using this class type provides a better interface with better control * and more functionality. * * History * 1.00, 1998-08-19, David R Tribble. * First cut, converted from C++ to C. * * Copyright ©1999, by David R. Tribble, all rights reserved. * See "drt/sys/copyr.txt" for more information. *----------------------------------------------------------------------------*/ #ifndef drt_sys_kchar_h #define drt_sys_kchar_h 100 #ifdef __cplusplus extern "C" { #endif /* Identification */ #ifndef NO_H_IDENT static const char drt_sys_kchar_h_id[] = "@(#)drt/sys/kchar.h 1.00"; #endif /* Special includes */ #ifndef drt_sys_kdefs_h #include "kdefs.h" #endif /* Local includes */ #ifndef drt_sys_kdebug_h #include "kdebug.h" #endif /* Local wrappers */ #include "klib1.h" /*----------------------------------------------------------------------------- * Type drt_char_t * Unicode character code datatype (16 bit). * * History * 100, 1998-08-19, David R Tribble. * First cut. *----------------------------------------------------------------------------*/ #define DRT_CHAR_VS 100 /* Type version */ typedef drt_uint16_t drt_char_t; /* Unicode character type */ /* Public constants */ extern const int drt_char_vs; /* Library version */ extern const unsigned char drt_atoe_01047[0x100]; /* ASCII to EBCDIC table */ extern const unsigned char drt_etoa_01047[0x100]; /* EBCDIC to ASCII table */ #define drt_char_code_VS 100 /* Enum version */ enum /*anonymous*/ { C_NUL = 0x0000, /* Null */ C_HT = 0x0009, /* Tab */ C_LF = 0x000A, /* Linefeed */ C_VT = 0x000B, /* Vertical tab */ C_FF = 0x000C, /* Formfeed */ C_CR = 0x000D, /* Carriage return */ C_SP = 0x0020, /* Space */ C_DEL = 0x007F, /* Delete */ C_NBSP = 0x00A0, /* Nonbreaking space */ C_EOLN = 0x2028, /* End-of-line */ C_SURR_H = 0xD800, /* Surrogate, high half */ C_SURR_L = 0xDC00, /* Surrogate, low half */ C_EOF = 0xFFFF, /* End-of-file */ C_NAC = 0xFFFF, /* Not-a-char */ C_BOM = 0xFEFF, /* Byte-order-mark */ C_BOM_R = 0xFFFE, /* Byte-order-mark, swapped */ C_MAX = 0xFFFF /* Highest char code */ }; /* Public variables */ extern struct DrtTraceGrp drt_char_grp; /* Debugging trace group */ /* Public functions */ extern bool drt_char_is_alnum(drt_char_t c); /* Is alphanumeric */ extern bool drt_char_is_alpha(drt_char_t c); /* Is alphabetic */ extern bool drt_char_is_control(drt_char_t c); /* Is control */ extern bool drt_char_is_digit(drt_char_t c); /* Is a digit */ extern bool drt_char_is_eoln(drt_char_t c); /* Is an end-of-line char */ extern bool drt_char_is_hex(drt_char_t c); /* Is a hexdecimal digit */ extern bool drt_char_is_lower(drt_char_t c); /* Is lowercase alphabetic */ extern bool drt_char_is_null(drt_char_t c); /* Is null char */ extern bool drt_char_is_octal(drt_char_t c); /* Is an octal digit */ extern bool drt_char_is_print(drt_char_t c); /* Is printable */ extern bool drt_char_is_print_8(drt_char_t c); /* Is printable as 8-bit ASCII */ extern bool drt_char_is_punct(drt_char_t c); /* Is punctuation */ extern bool drt_char_is_space(drt_char_t c); /* Is space */ extern bool drt_char_is_surr(drt_char_t c); /* Is a surrogate half-char */ extern bool drt_char_is_upper(drt_char_t c); /* Is uppercase alphabetic */ extern bool drt_char_is_white(drt_char_t c); /* Is whitespace */ extern bool drt_char_is_ASCII_7(drt_char_t c); /* Is 7-bit ASCII/ISO-646 */ extern bool drt_char_is_ISO_8859_1(drt_char_t c); /* Is 8-bit ISO-8859-1/Latin-1 */ extern int drt_char_to_UTF_7(char *s, drt_char_t c); /* Convert to UTF-7 seq */ extern int drt_char_to_UTF_8(char *s, drt_char_t c); /* Convert to UTF-8 seq */ extern int drt_char_to_ISO_8859_1(char *s, drt_char_t c); /* Convert to ISO-8859-1 seq */ extern int drt_char_from_UTF_7(drt_char_t *c, const char *s); /* Convert from UTF-7 seq */ extern int drt_char_from_UTF_8(drt_char_t *c, const char *s); /* Convert from UTF-8 seq */ extern int drt_char_from_ISO_8859_1(drt_char_t *c, const char *s); /* Convert from ISO-8859-1 seq */ extern int drt_char_copy(drt_char_t d[], const drt_char_t s[], int len); /* Copy buffer contents */ /* Wrapper end */ #include "klib2.h" #ifdef __cplusplus } #endif #endif /* drt_sys_kchar_h */ /* End drt/sys/kchar.h */