kolibrios/contrib/sdk/sources/newlib/libc/stdlib/wctomb_r.c

#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <locale.h>
#include "mbctype.h"
#include "local.h"

int (*__wctomb) (struct _reent *, char *, wchar_t, const char *charset,
		 mbstate_t *)
#ifdef __CYGWIN__
   /* Cygwin starts up in UTF-8 mode. */
    = __utf8_wctomb;
#else
    = __ascii_wctomb;
#endif

int
_DEFUN (_wctomb_r, (r, s, wchar, state),
        struct _reent *r     _AND
        char          *s     _AND
        wchar_t        _wchar _AND
        mbstate_t     *state)
{
  return __wctomb (r, s, _wchar, __locale_charset (), state);
}

int
_DEFUN (__ascii_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  /* Avoids compiler warnings about comparisons that are always false
     due to limited range when sizeof(wchar_t) is 2 but sizeof(wint_t)
     is 4, as is the case on cygwin.  */
  wint_t wchar = _wchar;

  if (s == NULL)
    return 0;

#ifdef __CYGWIN__
  if ((size_t)wchar >= 0x80)
#else
  if ((size_t)wchar >= 0x100)
#endif
    {
      r->_errno = EILSEQ;
      return -1;
    }

  *s = (char) wchar;
  return 1;
}

#ifdef _MB_CAPABLE
/* for some conversions, we use the __count field as a place to store a state value */
#define __state __count

int
_DEFUN (__utf8_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  wint_t wchar = _wchar;
  int ret = 0;

  if (s == NULL)
    return 0; /* UTF-8 encoding is not state-dependent */

  if (sizeof (wchar_t) == 2 && state->__count == -4
      && (wchar < 0xdc00 || wchar > 0xdfff))
    {
      /* There's a leftover lone high surrogate.  Write out the CESU-8 value
	 of the surrogate and proceed to convert the given character.  Note
	 to return extra 3 bytes. */
      wchar_t tmp;
      tmp = (state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8)
	    - (0x10000 >> 10 | 0xd80d);
      *s++ = 0xe0 | ((tmp & 0xf000) >> 12);
      *s++ = 0x80 | ((tmp &  0xfc0) >> 6);
      *s++ = 0x80 |  (tmp &   0x3f);
      state->__count = 0;
      ret = 3;
    }
  if (wchar <= 0x7f)
    {
      *s = wchar;
      return ret + 1;
    }
  if (wchar >= 0x80 && wchar <= 0x7ff)
    {
      *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
      *s   = 0x80 |  (wchar &  0x3f);
      return ret + 2;
    }
  if (wchar >= 0x800 && wchar <= 0xffff)
    {
      /* No UTF-16 surrogate handling in UCS-4 */
      if (sizeof (wchar_t) == 2 && wchar >= 0xd800 && wchar <= 0xdfff)
	{
	  wint_t tmp;
	  if (wchar <= 0xdbff)
	    {
	      /* First half of a surrogate pair.  Store the state and
	         return ret + 0. */
	      tmp = ((wchar & 0x3ff) << 10) + 0x10000;
	      state->__value.__wchb[0] = (tmp >> 16) & 0xff;
	      state->__value.__wchb[1] = (tmp >> 8) & 0xff;
	      state->__count = -4;
	      *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
	      return ret;
	    }
	  if (state->__count == -4)
	    {
	      /* Second half of a surrogate pair.  Reconstruct the full
		 Unicode value and return the trailing three bytes of the
		 UTF-8 character. */
	      tmp = (state->__value.__wchb[0] << 16)
		    | (state->__value.__wchb[1] << 8)
		    | (wchar & 0x3ff);
	      state->__count = 0;
	      *s++ = 0xf0 | ((tmp & 0x1c0000) >> 18);
	      *s++ = 0x80 | ((tmp &  0x3f000) >> 12);
	      *s++ = 0x80 | ((tmp &    0xfc0) >> 6);
	      *s   = 0x80 |  (tmp &     0x3f);
	      return 4;
	    }
	  /* Otherwise translate into CESU-8 value. */
	}
      *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
      *s++ = 0x80 | ((wchar &  0xfc0) >> 6);
      *s   = 0x80 |  (wchar &   0x3f);
      return ret + 3;
    }
  if (wchar >= 0x10000 && wchar <= 0x10ffff)
    {
      *s++ = 0xf0 | ((wchar & 0x1c0000) >> 18);
      *s++ = 0x80 | ((wchar &  0x3f000) >> 12);
      *s++ = 0x80 | ((wchar &    0xfc0) >> 6);
      *s   = 0x80 |  (wchar &     0x3f);
      return 4;
    }

  r->_errno = EILSEQ;
  return -1;
}

/* Cygwin defines its own doublebyte charset conversion functions
   because the underlying OS requires wchar_t == UTF-16. */
#ifndef __CYGWIN__
int
_DEFUN (__sjis_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  wint_t wchar = _wchar;

  unsigned char char2 = (unsigned char)wchar;
  unsigned char char1 = (unsigned char)(wchar >> 8);

  if (s == NULL)
    return 0;  /* not state-dependent */

  if (char1 != 0x00)
    {
    /* first byte is non-zero..validate multi-byte char */
      if (_issjis1(char1) && _issjis2(char2))
	{
	  *s++ = (char)char1;
	  *s = (char)char2;
	  return 2;
	}
      else
	{
	  r->_errno = EILSEQ;
	  return -1;
	}
    }
  *s = (char) wchar;
  return 1;
}

int
_DEFUN (__eucjp_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  wint_t wchar = _wchar;
  unsigned char char2 = (unsigned char)wchar;
  unsigned char char1 = (unsigned char)(wchar >> 8);

  if (s == NULL)
    return 0;  /* not state-dependent */

  if (char1 != 0x00)
    {
    /* first byte is non-zero..validate multi-byte char */
      if (_iseucjp1 (char1) && _iseucjp2 (char2))
	{
	  *s++ = (char)char1;
	  *s = (char)char2;
	  return 2;
	}
      else if (_iseucjp2 (char1) && _iseucjp2 (char2 | 0x80))
	{
	  *s++ = (char)0x8f;
	  *s++ = (char)char1;
	  *s = (char)(char2 | 0x80);
	  return 3;
	}
      else
	{
	  r->_errno = EILSEQ;
	  return -1;
	}
    }
  *s = (char) wchar;
  return 1;
}

int
_DEFUN (__jis_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  wint_t wchar = _wchar;
  int cnt = 0;
  unsigned char char2 = (unsigned char)wchar;
  unsigned char char1 = (unsigned char)(wchar >> 8);

  if (s == NULL)
    return 1;  /* state-dependent */

  if (char1 != 0x00)
    {
    /* first byte is non-zero..validate multi-byte char */
      if (_isjis (char1) && _isjis (char2))
	{
	  if (state->__state == 0)
	    {
	      /* must switch from ASCII to JIS state */
	      state->__state = 1;
	      *s++ = ESC_CHAR;
	      *s++ = '$';
	      *s++ = 'B';
	      cnt = 3;
	    }
	  *s++ = (char)char1;
	  *s = (char)char2;
	  return cnt + 2;
	}
      r->_errno = EILSEQ;
      return -1;
    }
  if (state->__state != 0)
    {
      /* must switch from JIS to ASCII state */
      state->__state = 0;
      *s++ = ESC_CHAR;
      *s++ = '(';
      *s++ = 'B';
      cnt = 3;
    }
  *s = (char)char2;
  return cnt + 1;
}
#endif /* !__CYGWIN__ */

#ifdef _MB_EXTENDED_CHARSETS_ISO
int
_DEFUN (__iso_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  wint_t wchar = _wchar;

  if (s == NULL)
    return 0;

  /* wchars <= 0x9f translate to all ISO charsets directly. */
  if (wchar >= 0xa0)
    {
      int iso_idx = __iso_8859_index (charset + 9);
      if (iso_idx >= 0)
	{
	  unsigned char mb;

	  if (s == NULL)
	    return 0;

	  for (mb = 0; mb < 0x60; ++mb)
	    if (__iso_8859_conv[iso_idx][mb] == wchar)
	      {
		*s = (char) (mb + 0xa0);
		return 1;
	      }
	  r->_errno = EILSEQ;
	  return -1;
	}
    }

  if ((size_t)wchar >= 0x100)
    {
      r->_errno = EILSEQ;
      return -1;
    }

  *s = (char) wchar;
  return 1;
}
#endif /* _MB_EXTENDED_CHARSETS_ISO */

#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
int
_DEFUN (__cp_wctomb, (r, s, wchar, charset, state),
        struct _reent *r       _AND
        char          *s       _AND
        wchar_t        _wchar  _AND
	const char    *charset _AND
        mbstate_t     *state)
{
  wint_t wchar = _wchar;

  if (s == NULL)
    return 0;

  if (wchar >= 0x80)
    {
      int cp_idx = __cp_index (charset + 2);
      if (cp_idx >= 0)
	{
	  unsigned char mb;

	  if (s == NULL)
	    return 0;

	  for (mb = 0; mb < 0x80; ++mb)
	    if (__cp_conv[cp_idx][mb] == wchar)
	      {
		*s = (char) (mb + 0x80);
		return 1;
	      }
	  r->_errno = EILSEQ;
	  return -1;
	}
    }

  if ((size_t)wchar >= 0x100)
    {
      r->_errno = EILSEQ;
      return -1;
    }

  *s = (char) wchar;
  return 1;
}
#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
#endif /* _MB_CAPABLE */