From 04c2b839f3cce7eb14e997061d512183a8f0cbc5 Mon Sep 17 00:00:00 2001 From: lex <Алексей Михайлов> Date: Wed, 4 Mar 2026 15:41:49 +0300 Subject: [PATCH 1/2] Update iconv UTF-16LE support and fix --- programs/develop/libraries/iconv/iconv.c | 14 ++-- programs/develop/libraries/iconv/utf16le.h | 79 ++++++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 programs/develop/libraries/iconv/utf16le.h diff --git a/programs/develop/libraries/iconv/iconv.c b/programs/develop/libraries/iconv/iconv.c index 4c4d23c84..3a43c7226 100644 --- a/programs/develop/libraries/iconv/iconv.c +++ b/programs/develop/libraries/iconv/iconv.c @@ -24,6 +24,7 @@ typedef int iconv_t; #define KOI8_RU 3 #define ISO8859_5 4 #define UTF_8 5 +#define UTF_16LE 6 int strcmp (const char* a, const char* b) { return (*a && *b && (*a == *b)) ? ((*(a+1) || *(b+1)) ? (strcmp(a+1, b+1)) : (0)) : ((*a > *b) ? (1) : (-1)); @@ -35,6 +36,7 @@ int strcmp (const char* a, const char* b) { #include "koi8_ru.h" #include "iso8859_5.h" #include "utf8.h" +#include "utf16le.h" int encoding(const char *what) { if (!strcmp(what,"CP866")) return CP866; @@ -43,6 +45,7 @@ int encoding(const char *what) { if (!strcmp(what,"KOI8-RU")) return KOI8_RU; if (!strcmp(what,"ISO8859-5")) return ISO8859_5; if (!strcmp(what,"UTF-8")) return UTF_8; + if (!strcmp(what,"UTF-16LE")) return UTF_16LE; return -1; } @@ -64,8 +67,8 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, char *str; str=*outbuf; - from=cd>>16; - to=cd&0xFFFF; + to=cd>>16; + from=cd&0xFFFF; switch (from) { @@ -75,6 +78,7 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, case ISO8859_5: mbtowc=iso8859_5_mbtowc; break; case KOI8_RU: mbtowc=koi8_ru_mbtowc; break; case UTF_8: mbtowc=utf8_mbtowc; break; + case UTF_16LE: mbtowc=utf16le_mbtowc; break; default: return -2; } @@ -86,6 +90,7 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, case ISO8859_5: wctomb=iso8859_5_wctomb; break; case KOI8_RU: wctomb=koi8_ru_wctomb; break; case UTF_8: wctomb=utf8_wctomb; break; + case UTF_16LE: wctomb=utf16le_wctomb; break; default: return -3; } @@ -101,12 +106,13 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, // printf("%d\n",n); converted = (mbtowc)(0,&pwc,((*inbuf)+count1),n); + if (converted==RET_TOOFEW(0) && n>=(int)*inbytesleft) break; n++; } while (converted==RET_TOOFEW(0)); - if (converted<0) return -10; + if (converted<0) { *(str+count2)='\0'; return -10; } //written= (cp866_wctomb)(0,str+count2,pwc,1); - written = (wctomb)(0,str+count2,pwc,1); + written = (wctomb)(0,str+count2,pwc,*outbytesleft); if (written<0) written=0;//return -11; //printf("Conv:%d Wri:%d In:%d Out:%d UTF:%x UCS:%x 866:%s\n",converted, written, *inbytesleft,*outbytesleft,*((*inbuf)+count1),pwc, str); diff --git a/programs/develop/libraries/iconv/utf16le.h b/programs/develop/libraries/iconv/utf16le.h new file mode 100644 index 000000000..97e94bc59 --- /dev/null +++ b/programs/develop/libraries/iconv/utf16le.h @@ -0,0 +1,79 @@ +/* + * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc. + * This file is part of the GNU LIBICONV Library. + * + * The GNU LIBICONV Library is free software; you can redistribute it + * and/or modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * The GNU LIBICONV Library is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the GNU LIBICONV Library; see the file COPYING.LIB. + * If not, see . + */ + +/* + * UTF-16LE + */ + +/* Specification: RFC 2781 */ + +static int +utf16le_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n) +{ + int count = 0; + if (n >= 2) { + ucs4_t wc = s[0] + (s[1] << 8); + if (wc >= 0xd800 && wc < 0xdc00) { + if (n >= 4) { + ucs4_t wc2 = s[2] + (s[3] << 8); + if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) + goto ilseq; + *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); + return count+4; + } + } else if (wc >= 0xdc00 && wc < 0xe000) { + goto ilseq; + } else { + *pwc = wc; + return count+2; + } + } + return RET_TOOFEW(count); + +ilseq: + return RET_SHIFT_ILSEQ(count); +} + +static int +utf16le_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n) +{ + if (!(wc >= 0xd800 && wc < 0xe000)) { + if (wc < 0x10000) { + if (n >= 2) { + r[0] = (unsigned char) wc; + r[1] = (unsigned char) (wc >> 8); + return 2; + } else + return RET_TOOSMALL; + } + else if (wc < 0x110000) { + if (n >= 4) { + ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); + ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); + r[0] = (unsigned char) wc1; + r[1] = (unsigned char) (wc1 >> 8); + r[2] = (unsigned char) wc2; + r[3] = (unsigned char) (wc2 >> 8); + return 4; + } else + return RET_TOOSMALL; + } + } + return RET_ILUNI; +} -- 2.49.1 From 0a6f1a772919bd1247c6bfb6339ed67f03fa3500 Mon Sep 17 00:00:00 2001 From: lex <Алексей Михайлов> Date: Wed, 4 Mar 2026 16:04:17 +0300 Subject: [PATCH 2/2] return fix --- programs/develop/libraries/iconv/iconv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/develop/libraries/iconv/iconv.c b/programs/develop/libraries/iconv/iconv.c index 3a43c7226..d49a10343 100644 --- a/programs/develop/libraries/iconv/iconv.c +++ b/programs/develop/libraries/iconv/iconv.c @@ -110,10 +110,10 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, n++; } while (converted==RET_TOOFEW(0)); - if (converted<0) { *(str+count2)='\0'; return -10; } + if (converted<0) return -10; //written= (cp866_wctomb)(0,str+count2,pwc,1); written = (wctomb)(0,str+count2,pwc,*outbytesleft); - if (written<0) written=0;//return -11; + if (written<0) written=0;//return -11; //printf("Conv:%d Wri:%d In:%d Out:%d UTF:%x UCS:%x 866:%s\n",converted, written, *inbytesleft,*outbytesleft,*((*inbuf)+count1),pwc, str); -- 2.49.1