From 9378825c030b03c05b6890127e52a211296f40b1 Mon Sep 17 00:00:00 2001 From: hidnplayr Date: Tue, 10 Oct 2017 21:54:05 +0000 Subject: [PATCH] Iconv: add proper KOI8-R support. git-svn-id: svn://kolibrios.org@7084 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/develop/libraries/iconv/iconv.c | 8 +- programs/develop/libraries/iconv/koi8_r.h | 153 ++++++++++++++++++++++ 2 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 programs/develop/libraries/iconv/koi8_r.h diff --git a/programs/develop/libraries/iconv/iconv.c b/programs/develop/libraries/iconv/iconv.c index 45cdd506ee..91e29500d0 100644 --- a/programs/develop/libraries/iconv/iconv.c +++ b/programs/develop/libraries/iconv/iconv.c @@ -25,10 +25,12 @@ typedef int iconv_t; #define KOI8_RU 3 #define ISO8859_5 4 #define UTF_8 5 +#define KOI8_R 6 #include "cp866.h" #include "cp1251.h" #include "cp1252.h" +#include "koi8_r.h" #include "koi8_ru.h" #include "iso8859_5.h" #include "utf8.h" @@ -48,10 +50,10 @@ int encoding(const char *someencoding) { if (!strcasecmp(what,"CP866")) return CP866; if (!strcasecmp(what,"CP1251")) return CP1251; if (!strcasecmp(what,"windows-1251")) return CP1251; - if (!strcasecmp(what,"windows-1252")) return CP1252; if (!strcasecmp(what,"CP1252")) return CP1252; + if (!strcasecmp(what,"windows-1252")) return CP1252; + if (!strcasecmp(what,"KOI8-R")) return KOI8_R; if (!strcasecmp(what,"KOI8-RU")) return KOI8_RU; - if (!strcasecmp(what,"KOI8-R")) return KOI8_RU; if (!strcasecmp(what,"ISO8859-5")) return ISO8859_5; if (!strcasecmp(what,"UTF-8")) return UTF_8; return -1; @@ -93,6 +95,7 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, case CP1251: mbtowc=cp1251_mbtowc; break; case CP1252: mbtowc=cp1252_mbtowc; break; case ISO8859_5: mbtowc=iso8859_5_mbtowc; break; + case KOI8_R: mbtowc=koi8_r_mbtowc; break; case KOI8_RU: mbtowc=koi8_ru_mbtowc; break; case UTF_8: mbtowc=utf8_mbtowc; break; default: return (size_t)-1; @@ -104,6 +107,7 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, case CP1251: wctomb=cp1251_wctomb; break; case CP1252: wctomb=cp1252_wctomb; break; case ISO8859_5: wctomb=iso8859_5_wctomb; break; + case KOI8_R: wctomb=koi8_r_wctomb; break; case KOI8_RU: wctomb=koi8_ru_wctomb; break; case UTF_8: wctomb=utf8_wctomb; break; default: return (size_t)-1; diff --git a/programs/develop/libraries/iconv/koi8_r.h b/programs/develop/libraries/iconv/koi8_r.h new file mode 100644 index 0000000000..e4d93ee3b3 --- /dev/null +++ b/programs/develop/libraries/iconv/koi8_r.h @@ -0,0 +1,153 @@ +/* + * Copyright (C) 1999-2001 Free Software Foundation, Inc. + * This file is part of the GNU LIBICONV Library. + * + * The GNU LIBICONV Library is free software; you can redistribute it + * and/or modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * The GNU LIBICONV Library is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with the GNU LIBICONV Library; see the file COPYING.LIB. + * If not, write to the Free Software Foundation, Inc., 59 Temple Place - + * Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * KOI8-R + */ + +/* Specification: RFC 1489 */ + +static const unsigned short koi8_r_2uni[128] = { + /* 0x80 */ + 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524, + 0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, + /* 0x90 */ + 0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248, + 0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7, + /* 0xa0 */ + 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556, + 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e, + /* 0xb0 */ + 0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9, + /* 0xc0 */ + 0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, + /* 0xd0 */ + 0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a, + /* 0xe0 */ + 0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, + /* 0xf0 */ + 0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a, +}; + +static int +koi8_r_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) +{ + unsigned char c = *s; + if (c < 0x80) + *pwc = (ucs4_t) c; + else + *pwc = (ucs4_t) koi8_r_2uni[c-0x80]; + return 1; +} + +static const unsigned char koi8_r_page00[88] = { + 0x9a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ + 0x00, 0xbf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ + 0x9c, 0x00, 0x9d, 0x00, 0x00, 0x00, 0x00, 0x9e, /* 0xb0-0xb7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9f, /* 0xf0-0xf7 */ +}; +static const unsigned char koi8_r_page04[88] = { + 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ + 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, /* 0x10-0x17 */ + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, /* 0x18-0x1f */ + 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, /* 0x20-0x27 */ + 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, /* 0x28-0x2f */ + 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, /* 0x30-0x37 */ + 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, /* 0x38-0x3f */ + 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, /* 0x40-0x47 */ + 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1, /* 0x48-0x4f */ + 0x00, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ +}; +static const unsigned char koi8_r_page22[80] = { + 0x00, 0x95, 0x96, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ + 0x97, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ + 0x00, 0x00, 0x00, 0x00, 0x98, 0x99, 0x00, 0x00, /* 0x60-0x67 */ +}; +static const unsigned char koi8_r_page23[8] = { + 0x93, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ +}; +static const unsigned char koi8_r_page25[168] = { + 0x80, 0x00, 0x81, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ + 0x00, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, /* 0x08-0x0f */ + 0x83, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, /* 0x10-0x17 */ + 0x85, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, /* 0x18-0x1f */ + 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, /* 0x20-0x27 */ + 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, /* 0x28-0x2f */ + 0x00, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, /* 0x30-0x37 */ + 0x00, 0x00, 0x00, 0x00, 0x8a, 0x00, 0x00, 0x00, /* 0x38-0x3f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ + 0xa0, 0xa1, 0xa2, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, /* 0x50-0x57 */ + 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, /* 0x58-0x5f */ + 0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, /* 0x60-0x67 */ + 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0x00, 0x00, 0x00, /* 0x68-0x6f */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ + 0x8b, 0x00, 0x00, 0x00, 0x8c, 0x00, 0x00, 0x00, /* 0x80-0x87 */ + 0x8d, 0x00, 0x00, 0x00, 0x8e, 0x00, 0x00, 0x00, /* 0x88-0x8f */ + 0x8f, 0x90, 0x91, 0x92, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ + 0x94, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ +}; + +static int +koi8_r_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) +{ + unsigned char c = 0; + if (wc < 0x0080) { + *r = wc; + return 1; + } + else if (wc >= 0x00a0 && wc < 0x00f8) + c = koi8_r_page00[wc-0x00a0]; + else if (wc >= 0x0400 && wc < 0x0458) + c = koi8_r_page04[wc-0x0400]; + else if (wc >= 0x2218 && wc < 0x2268) + c = koi8_r_page22[wc-0x2218]; + else if (wc >= 0x2320 && wc < 0x2328) + c = koi8_r_page23[wc-0x2320]; + else if (wc >= 0x2500 && wc < 0x25a8) + c = koi8_r_page25[wc-0x2500]; + if (c != 0) { + *r = c; + return 1; + } + return RET_ILUNI; +}