//=============================================================================
// drt/sys/schar4.cpp
//	Unicode character classes.
//
//	These classes provide a representation of Unicode characters.
//
// History
//	0.01, 1999-08-18, David R Tribble.
//	First cut.
//
// Copyright Š1999, by David R. Tribble, all rights reserved.
// See "drt/sys/copyr.txt" for more information.
//-----------------------------------------------------------------------------


// Identification

static const char	id[] =
    "@(#)drt/sys/schar4.cpp 0.01";


// System includes

#include <assert.h>
#define drt_std_assert_h	1

#include <ctype.h>
#define drt_std_ctype_h		1


// Special includes

#include "sdefs.hpp"


// Local includes

#include "sdebug.hpp"

#include "schar.hpp"


// Local wrappers

#include "slib1.hpp"

drt_namespace_begin


//-----------------------------------------------------------------------------
// Class member functions
//-----------------------------------------------------------------------------

//-----------------------------------------------------------------------------
// DrtChar::toUTF_8()
//	Write this character into string 's' as one or more octets.
//
// Notes
//	The binary representation of the character's integer value is simply
//	spread across the octets and the number of high bits set in the leading
//	byte announces the number of bytes in the multibyte sequence:
//
//	Bytes | Bits | Representation
//	------+------+----------------
//	    1 |    7 | 0vvvvvvv
//	    2 |   11 | 110vvvvv 10vvvvvv
//	    3 |   16 | 1110vvvv 10vvvvvv 10vvvvvv
//	    4 |   21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
//	    5 |   26 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
//	    6 |   31 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv
//
//	Up to six octets are shown for full 31-bit Unicode character codes, but
//	since we're only dealing with 16-bit codes, 's' will be filled with no
//	more than three octets.
//
// Returns
//	The number of octets written into string 's', which will be in the
//	range [1,3], or -1 on error.
//
// Caveats
//	Since this Unicode character contains only 16 bits, there will never be
//	more than three octets placed into string 's'.
//-----------------------------------------------------------------------------

int DrtChar::toUTF_8(char *s) const
{
#if DrtChar_VS/100 != 1
 #error DrtChar_VS has changed
#endif

    //DrtTrace	dbg(s_grp, "toUTF_8", this);

    // Validate this object
    validate();

    // Check args
    if (s == null)
        return (-1);

    // Decompose this character into one or more octets
    if (m_ch < 0x0080)
    {
        s[0] = m_ch;
        return (1);
    }
    else if (m_ch < 0x0800)
    {
        s[0] = 0xC0 | (m_ch >> 6);
        s[1] = 0x80 | (m_ch & 0x3F);
        return (2);
    }
    else // (m_ch < 0x10000)
    {
        s[0] = 0xE0 | (m_ch >> 12);
        s[1] = 0x80 | (m_ch >> 6 & 0x3F);
        s[2] = 0x80 | (m_ch & 0x3F);
        return (3);
    }
}


//-----------------------------------------------------------------------------
// DrtChar::fromUTF_8()
//	Read one or more octets from string 's' to compose this Unicode
//	character code.
//
// Notes
//	(See DrtChar::toUTF_8().)
//
// Returns
//	The number of octets read from string 's', which will be in the range
//	[1,3], or -1 on error.
//
// Caveats
//	No attempt is made to handle UTF-8 encodings of Unicode characters with
//	more than 16 bits, i.e., more than three octets.
//-----------------------------------------------------------------------------

int DrtChar::fromUTF_8(const char *s)
{
#if DrtChar_VS/100 != 1
 #error DrtChar_VS has changed
#endif

    //DrtTrace	dbg(s_grp, "fromUTF_8", this);

    // Validate this object
    validate();

    // Check args
    if (s == null)
        return (-1);

    // Compose this character from one or more octets
    const unsigned char *	c = reinterpret_cast(const unsigned char *, s);

    if (c[0] < 0x80)
    {
        m_ch = c[0];
        return (1);
    }
    else if (c[0] < 0xE0)
    {
        m_ch =  (c[0] & 0x1F) << 5;
        m_ch |= (c[1] & 0x3F);
        return (2);
    }
    else // if (c[0] < 0xF0)
    {
        m_ch =  (c[0] & 0x0F) << 12;
        m_ch |= (c[1] & 0x3F) << 6;
        m_ch |= (c[2] & 0x3F);
        return (3);
    }
}


drt_namespace_end

// End schar4.cpp