//============================================================================= // drt/sys/schar.hpp // DRT Unicode character classes. // // These classes provide a portable representation of Unicode characters. // // Notes // The 'DrtChar' type is used to represent Unicode characters rather than // the 'wchar_t' because the latter varies across implementations and can // be as small as 8 bits or as large as 64 bits. // Using this class type provides a better interface with better control // and more functionality. // // History // 0.01, 1998-04-04, David R Tribble. // First cut. // // 0.01, 1998-04-19, David R Tribble. // Added class ScmString. // // 0.02, 1998-04-24, David R Tribble. // Renamed from "scchar.h" to "jchar.h". // // 0.03, 1998-05-30, David R Tribble. // Changed filename prefix from "j" to "k". // // 0.04, 1999-02-21, David R Tribble. // Moved from scm/ to drt/sys/. // // Copyright ©1998-1999, by David R. Tribble, all rights reserved. // See "drt/sys/copyr.txt" for more information. //----------------------------------------------------------------------------- #ifndef drt_sys_schar_hpp #define drt_sys_schar_hpp 004 // Identification #ifndef NO_H_IDENT static const char drt_sys_schar_hpp_id[] = "@(#)drt/sys/schar.hpp 0.04"; #endif // Special includes #ifndef drt_sys_sdefs_hpp #include "sdefs.hpp" #endif // Local includes #ifndef drt_sys_sdebug_hpp #include "sdebug.hpp" #endif // Local wrappers #include "slib1.hpp" drt_namespace_begin //----------------------------------------------------------------------------- // Class DrtChar // Contains a Unicode character code. // // Notes // For efficiency, this is implemented as a POD class, and is exactly the // size of a 16-bit Unicode character. // // History // 100, 1998-04-04, David R Tribble. // First cut. // // 101, 1998-04-19, David R Tribble. // Added .toXXX() and .fromXXX() funcs. // // 102, 1998-04-24, David R Tribble. // Replaced constants 's_null' et all with enums 'C_NULL' et al. //----------------------------------------------------------------------------- #define DrtChar_VS 102 // Class version class DRTEXPORT DrtChar { public: // Shared constants static const int VS; // Class version static const unsigned int MAGIC; // Class magic number static const unsigned char s_atoe[0x100]; // ASCII to EBCDIC table static const unsigned char s_etoa[0x100]; // EBCDIC to ASCII table public: // Constants #define DrtChar_Code_VS 100 // Enum version enum Code // Character codes { C_NUL = 0x0000, // Null C_HT = 0x0009, // Tab C_LF = 0x000A, // Linefeed C_VT = 0x000B, // Vertical tab C_FF = 0x000C, // Formfeed C_CR = 0x000D, // Carriage return C_SP = 0x0020, // Space C_DEL = 0x007F, // Delete C_NBSP = 0x00A0, // Nonbreaking space C_EOLN = 0x2028, // End-of-line C_SURR_H = 0xD800, // Surrogate, high half C_SURR_L = 0xDC00, // Surrogate, low half C_EOF = 0xFFFF, // End-of-file C_NAC = 0xFFFF, // Not-a-char C_BOM = 0xFEFF, // Byte-order-mark C_BOM_R = 0xFFFE, // Byte-order-mark, swapped C_MAX = 0xFFFF // Highest char code }; public: // Shared variables static DrtTraceGroup s_grp; // Class debugging group public: // Variables unsigned short m_ch; // The character code public: // Shared functions static bool isAlnum(const DrtChar &c); // Is alphanumeric static bool isAlpha(const DrtChar &c); // Is alphabetic static bool isDigit(const DrtChar &c); // Is a digit static bool isControl(const DrtChar &c); // Is control static bool isEoln(const DrtChar &c); // Is an end-of-line char static bool isHex(const DrtChar &c); // Is a hexdecimal digit static bool isLower(const DrtChar &c); // Is lowercase alphabetic static bool isNull(const DrtChar &c); // Is null char static bool isOctal(const DrtChar &c); // Is an octal digit static bool isPrint(const DrtChar &c); // Is printable static bool isPrint_8(const DrtChar &c); // Is printable as 8-bit ASCII static bool isPunct(const DrtChar &c); // Is punctuation static bool isSpace(const DrtChar &c); // Is space static bool isSurr(const DrtChar &c); // Is a surrogate half-char static bool isUpper(const DrtChar &c); // Is uppercase alphabetic static bool isWhite(const DrtChar &c); // Is whitespace static bool isASCII_7(const DrtChar &c); // Is 7-bit ASCII/ISO-646 static bool isISO_8859_1(const DrtChar &c); // Is 8-bit ISO-8859-1/Latin-1 static int copy(DrtChar d[], const DrtChar s[], int len); // Copy buffer contents static int copy(DrtChar d[], const char s[], int len); // Copy buffer contents public: // Functions void validate() const; // Validate this object void dump() const; // Dump contents to debugs inline void set(int ch); // Set the char code inline int ch() const; // Get the char code bool isNull() const; // Is null char bool isEoln() const; // Is end-of-line char bool isSpace() const; // Is space char bool isWhite() const; // Is whitespace char bool isControl() const; // Is control char bool isPrint() const; // Is printable bool isAlnum() const; // Is alphanumeric bool isDigit() const; // Is a digit bool isPrint_8() const; // Is printable as 8-bit ASCII bool isASCII_7() const; // Is 7-bit ASCII/ISO-646 bool isISO_8859_1() const; // Is 8-bit ISO-8859-1/Latin-1 int toUTF_7(char *s) const; // Convert to UTF-7 seq int toUTF_8(char *s) const; // Convert to UTF-8 seq int toISO_8859_1(char *s) const; // Convert to ISO-8859-1 seq int fromUTF_7(const char *s); // Convert from UTF-7 seq int fromUTF_8(const char *s); // Convert from UTF-8 seq int fromISO_8859_1(const char *s); // Convert from ISO-8859-1 seq public: // Operators inline const DrtChar & operator =(const DrtChar &r); // Assignment inline const DrtChar & operator =(int r); // Assignment inline bool operator ==(const DrtChar &r) const; // Compare to another char inline bool operator !=(const DrtChar &r) const; // Compare to another char inline bool operator <(const DrtChar &r) const; // Compare to another char inline bool operator >(const DrtChar &r) const; // Compare to another char inline bool operator <=(const DrtChar &r) const; // Compare to another char inline bool operator >=(const DrtChar &r) const; // Compare to another char inline bool operator ==(int r) const; // Compare to another char inline bool operator !=(int r) const; // Compare to another char inline bool operator <(int r) const; // Compare to another char inline bool operator >(int r) const; // Compare to another char inline bool operator <=(int r) const; // Compare to another char inline bool operator >=(int r) const; // Compare to another char private: // Functions // Constructors and destructors not provided // No destructor // No default constructor // No copy constructor // /*DrtChar*/ DrtChar(int r); // Constructor }; //----------------------------------------------------------------------------- // Inline functions //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // DrtChar::set() // Set the character code for this character object. //----------------------------------------------------------------------------- inline void DrtChar::set(int ch) { #if DrtChar_VS/100 != 1 #error Class DrtChar has changed #endif // Assign this character code m_ch = unsigned short(ch); } //----------------------------------------------------------------------------- // DrtChar::ch() // Get the character code for this character object. // // Returns // A Unicode character code in the range [0x0000,0xFFFE], or 0xFFFF on // error. //----------------------------------------------------------------------------- inline int DrtChar::ch() const { #if DrtChar_VS/100 != 1 #error Class DrtChar has changed #endif // Retrieve this character code return (m_ch); } //----------------------------------------------------------------------------- // DrtChar::operator =() // Assignment operator. //----------------------------------------------------------------------------- inline const DrtChar & DrtChar::operator =(const DrtChar &r) { #if DrtChar_VS != 102 #error Class DrtChar has changed #endif // Assign contents of 'r' to this object m_ch = r.m_ch; return (*this); } //----------------------------------------------------------------------------- // DrtChar::operator =() // Assignment operator. //----------------------------------------------------------------------------- inline const DrtChar & DrtChar::operator =(int r) { #if DrtChar_VS != 102 #error Class DrtChar has changed #endif // Assign character code 'r' to this object m_ch = r; return (*this); } //----------------------------------------------------------------------------- // DrtChar::operator ==() // Equality operator. //----------------------------------------------------------------------------- inline bool DrtChar::operator ==(int r) const { #if DrtChar_VS != 102 #error Class DrtChar has changed #endif // Compare character code 'r' to this object return (m_ch == r); } //----------------------------------------------------------------------------- // DrtChar::operator !=() // Inequality operator. //----------------------------------------------------------------------------- inline bool DrtChar::operator !=(int r) const { #if DrtChar_VS != 102 #error Class DrtChar has changed #endif // Compare character code 'r' to this object return (m_ch != r); } //----------------------------------------------------------------------------- // Class DrtString // Contains a string of Unicode character codes. // // History // 100, 1998-04-19, David R Tribble. // First cut. //----------------------------------------------------------------------------- #define DrtString_VS 101 // Class version class DRTEXPORT DrtString { public: // Shared constants static const int VS; // Class version static const unsigned int MAGIC; // Class magic number public: // Functions /*void*/ ~DrtString(); // Destructor /*DrtString*/ DrtString(); // Default constructor /*DrtString*/ DrtString(const char *s); // Constructor (cast operator) void validate() const; // Validate this object void dump() const; // Dump contents to debugs int size() const; // Get length bool isEmpty() const; // Is zero length bool take(DrtChar *s, int len = -1); // Steal string contents DrtChar * yield(); // Relinquish string contents int toUTF_7(char *s, int len); // Convert to UTF-7 seq int toUTF_8(char *s, int len); // Convert to UTF-8 seq int toISO_8859_1(char *s, int len); // Convert to ISO-8859-1 seq int fromUTF_7(const char *s, int len); // Convert from UTF-7 seq int fromUTF_8(const char *s, int len); // Convert from UTF-8 seq int fromISO_8859_1(const char *s, int len); // Convert from ISO-8859-1 seq public: // Operators const DrtString & operator =(const DrtString &r) const; // Assignment bool operator ==(const DrtString &r) const; // Compare to another string bool operator !=(const DrtString &r) const; // Compare to another string private: // Shared variables static DrtTraceGroup s_grp; // Class debugging group private: // Variables DrtChar * m_str; // The string of characters int m_len; // Length, number of chars private: // Functions // Constructors and destructors not provided /*DrtString*/ DrtString(const DrtString &r); // Copy constructor }; #include "slib2.hpp" drt_namespace_end #endif // drt_sys_schar_hpp // End drt/sys/schar.hpp