Update iconv UTF-16LE support and fix

This commit is contained in:
lex
2026-03-04 15:41:49 +03:00
parent cc1034d849
commit 04c2b839f3
2 changed files with 89 additions and 4 deletions

View File

@@ -24,6 +24,7 @@ typedef int iconv_t;
#define KOI8_RU 3
#define ISO8859_5 4
#define UTF_8 5
#define UTF_16LE 6
int strcmp (const char* a, const char* b) {
return (*a && *b && (*a == *b)) ? ((*(a+1) || *(b+1)) ? (strcmp(a+1, b+1)) : (0)) : ((*a > *b) ? (1) : (-1));
@@ -35,6 +36,7 @@ int strcmp (const char* a, const char* b) {
#include "koi8_ru.h"
#include "iso8859_5.h"
#include "utf8.h"
#include "utf16le.h"
int encoding(const char *what) {
if (!strcmp(what,"CP866")) return CP866;
@@ -43,6 +45,7 @@ int encoding(const char *what) {
if (!strcmp(what,"KOI8-RU")) return KOI8_RU;
if (!strcmp(what,"ISO8859-5")) return ISO8859_5;
if (!strcmp(what,"UTF-8")) return UTF_8;
if (!strcmp(what,"UTF-16LE")) return UTF_16LE;
return -1;
}
@@ -64,8 +67,8 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf,
char *str;
str=*outbuf;
from=cd>>16;
to=cd&0xFFFF;
to=cd>>16;
from=cd&0xFFFF;
switch (from)
{
@@ -75,6 +78,7 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf,
case ISO8859_5: mbtowc=iso8859_5_mbtowc; break;
case KOI8_RU: mbtowc=koi8_ru_mbtowc; break;
case UTF_8: mbtowc=utf8_mbtowc; break;
case UTF_16LE: mbtowc=utf16le_mbtowc; break;
default: return -2;
}
@@ -86,6 +90,7 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf,
case ISO8859_5: wctomb=iso8859_5_wctomb; break;
case KOI8_RU: wctomb=koi8_ru_wctomb; break;
case UTF_8: wctomb=utf8_wctomb; break;
case UTF_16LE: wctomb=utf16le_wctomb; break;
default: return -3;
}
@@ -101,12 +106,13 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf,
// printf("%d\n",n);
converted = (mbtowc)(0,&pwc,((*inbuf)+count1),n);
if (converted==RET_TOOFEW(0) && n>=(int)*inbytesleft) break;
n++;
} while (converted==RET_TOOFEW(0));
if (converted<0) return -10;
if (converted<0) { *(str+count2)='\0'; return -10; }
//written= (cp866_wctomb)(0,str+count2,pwc,1);
written = (wctomb)(0,str+count2,pwc,1);
written = (wctomb)(0,str+count2,pwc,*outbytesleft);
if (written<0) written=0;//return -11;
//printf("Conv:%d Wri:%d In:%d Out:%d UTF:%x UCS:%x 866:%s\n",converted, written, *inbytesleft,*outbytesleft,*((*inbuf)+count1),pwc, str);

View File

@@ -0,0 +1,79 @@
/*
* Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
* This file is part of the GNU LIBICONV Library.
*
* The GNU LIBICONV Library is free software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* The GNU LIBICONV Library is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with the GNU LIBICONV Library; see the file COPYING.LIB.
* If not, see <https://www.gnu.org/licenses/>.
*/
/*
* UTF-16LE
*/
/* Specification: RFC 2781 */
static int
utf16le_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
{
int count = 0;
if (n >= 2) {
ucs4_t wc = s[0] + (s[1] << 8);
if (wc >= 0xd800 && wc < 0xdc00) {
if (n >= 4) {
ucs4_t wc2 = s[2] + (s[3] << 8);
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
goto ilseq;
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
return count+4;
}
} else if (wc >= 0xdc00 && wc < 0xe000) {
goto ilseq;
} else {
*pwc = wc;
return count+2;
}
}
return RET_TOOFEW(count);
ilseq:
return RET_SHIFT_ILSEQ(count);
}
static int
utf16le_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
{
if (!(wc >= 0xd800 && wc < 0xe000)) {
if (wc < 0x10000) {
if (n >= 2) {
r[0] = (unsigned char) wc;
r[1] = (unsigned char) (wc >> 8);
return 2;
} else
return RET_TOOSMALL;
}
else if (wc < 0x110000) {
if (n >= 4) {
ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
r[0] = (unsigned char) wc1;
r[1] = (unsigned char) (wc1 >> 8);
r[2] = (unsigned char) wc2;
r[3] = (unsigned char) (wc2 >> 8);
return 4;
} else
return RET_TOOSMALL;
}
}
return RET_ILUNI;
}