diff --git a/programs/develop/libraries/iconv/iconv.c b/programs/develop/libraries/iconv/iconv.c index 9a855cc06d..b502fc9936 100644 --- a/programs/develop/libraries/iconv/iconv.c +++ b/programs/develop/libraries/iconv/iconv.c @@ -1,14 +1,11 @@ #include -//#include -typedef unsigned int size_t; -#define NULL ((void*)0) +#include +#include typedef int conv_t; typedef unsigned int ucs4_t; - typedef int iconv_t; - /* Return code if invalid input after a shift sequence of n bytes was read. (xxx_mbtowc) */ #define RET_SHIFT_ILSEQ(n) (-1-2*(n)) @@ -22,7 +19,6 @@ typedef int iconv_t; /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ #define RET_TOOSMALL -2 - #define CP866 0 #define CP1251 1 #define CP1252 2 @@ -37,8 +33,9 @@ typedef int iconv_t; #include "iso8859_5.h" #include "utf8.h" -int encoding(char *what) { +int encoding(const char *someencoding) { + char *what = strdup(someencoding); /* Ignore //TRANSLIT or //IGNORE for now. */ int i; for(i = 0; i < strlen(what); i++) { @@ -50,6 +47,7 @@ int encoding(char *what) { if (!strcasecmp(what,"CP866")) return CP866; if (!strcasecmp(what,"CP1251")) return CP1251; + if (!strcasecmp(what,"windows-1251")) return CP1251; if (!strcasecmp(what,"windows-1252")) return CP1252; if (!strcasecmp(what,"CP1252")) return CP1252; if (!strcasecmp(what,"KOI8-RU")) return KOI8_RU; @@ -58,7 +56,6 @@ int encoding(char *what) { return -1; } - iconv_t iconv_open(const char *tocode, const char *fromcode) { int to, from; @@ -67,6 +64,7 @@ iconv_t iconv_open(const char *tocode, const char *fromcode) { to=to<<16&0xFFFF0000; from=from&0xFFFF; + return to+from; } @@ -78,17 +76,16 @@ int iconv_close(iconv_t icd) size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) { - int n, to, from, count1,count2; - int pwc, converted,written; + int n, to, from; + size_t count1,count2; + unsigned int pwc; + int converted,written; int (*mbtowc)(conv_t, ucs4_t *, const unsigned char *, int); - int (*wctomb)(conv_t, ucs4_t *, const unsigned char *, int); - - char *str; - str=*outbuf; - - from=cd>>16; - to=cd&0xFFFF; - + int (*wctomb)(conv_t, unsigned char *, ucs4_t, int); + + to=cd>>16; + from=cd&0xFFFF; + switch (from) { case CP866: mbtowc=cp866_mbtowc; break; @@ -97,9 +94,9 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, case ISO8859_5: mbtowc=iso8859_5_mbtowc; break; case KOI8_RU: mbtowc=koi8_ru_mbtowc; break; case UTF_8: mbtowc=utf8_mbtowc; break; - default: return -2; + default: return (size_t)-1; } - + switch (to) { case CP866: wctomb=cp866_wctomb; break; @@ -108,126 +105,155 @@ size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, case ISO8859_5: wctomb=iso8859_5_wctomb; break; case KOI8_RU: wctomb=koi8_ru_wctomb; break; case UTF_8: wctomb=utf8_wctomb; break; - default: return -3; + default: return (size_t)-1; } - if(from == to) { - int oc=0,ic=0; - - while(*inbytesleft > 0 && *outbytesleft > 0) { - str[oc]=(*inbuf)[ic]; - ++ic; - ++oc; - (*inbytesleft)--; - (*outbytesleft)--; - (*outbuf)++; - } - - return 0; - } - count1=0; count2=0; - - while ( *inbytesleft>0 && *outbytesleft>1) - { - n=1; - - do { - //converted= (utf8_mbtowc)(0,&pwc,((*inbuf)+count1),n); - // printf("%d\n",n); - converted= (mbtowc)(0,&pwc,((*inbuf)+count1),n); - - n++; - } while (converted==RET_TOOFEW(0)); - - if (converted<0) return -10; - //written= (cp866_wctomb)(0,str+count2,pwc,1); - written= (wctomb)(0,str+count2,pwc,1); - if (written<0) written=0;//return -11; - - //printf("Conv:%d Wri:%d In:%d Out:%d UTF:%x UCS:%x 866:%s\n",converted, written, *inbytesleft,*outbytesleft,*((*inbuf)+count1),pwc, str); - - (*inbytesleft)-=converted; - (*outbytesleft)-=written; - (*outbuf)+=written; - count1+=converted; - count2+=written; - } - *(str+count2)='\0'; - - if (*inbytesleft>0 && *outbytesleft==0) return -12; - return 0; + + /* Convert input multibyte char to wide character by using calls to mbtowc */ + /* Convert wide character to multibyte by calls to wctomb */ + /* Handle errors as we go on converting to be as standard compliant as possible */ + while(count1 < *inbytesleft) { + unsigned char mbholder[] = { 0,0,0,0,0,0 }; + + int numbytes = (mbtowc)(0, &pwc,((*inbuf)+count1), *inbytesleft - count1); + if(numbytes < 0) { + /* errno = EILSEQ if invalid multibyte sequence encountered in input */ + /* errno = EINVAL if input ends in the middle of a multibyte sequence */ + + switch(numbytes) { + case RET_TOOFEW(0): + errno = EINVAL; + break; + + case RET_ILSEQ: + errno = EILSEQ; + break; + } + + *inbytesleft -= count1; + *outbytesleft -= count2; + *inbuf += count1; + *outbuf += count2; + return (size_t) -1; + } + + /* Convert from wide to multibyte storing result in mbholder and num converted in numbytes2 */ + /* Pass the minimum amount of space we have, one from mbholder and one from remaining in outbuf */ + int minspace = sizeof(mbholder) <= (*outbytesleft - count2) ? sizeof(mbholder) : (*outbytesleft - count2); + + int numbytes2 = (wctomb)(0, &mbholder[0], pwc, minspace); + if(numbytes2 < 0) { + switch(numbytes2) { + case RET_ILUNI: + errno = EILSEQ; + break; + case RET_TOOSMALL: + errno = E2BIG; + break; + } + + *inbytesleft -= count1; + *outbytesleft -= count2; + *inbuf += count1; + *outbuf += count2; + + return (size_t) -1; + } + + int i; + for(i = 0; i < numbytes2; i++) { + *(*outbuf + count2 + i) = mbholder[i]; + } + + count1+=numbytes; + count2+=numbytes2; + } + + /* Successfully converted everything, update the variables and return number of bytes converted */ + *inbytesleft -= count1; + *outbytesleft -= count2; + *inbuf += count1; + *outbuf += count2; + + return count1; } +/* int main() */ +/* { */ +/* char *s;// ="вертолет"; */ +/* char *z; */ +/* //unsigned int pwc; */ +/* iconv_t cd; */ +/* size_t in, out; */ -/* -int main() -{ - char *s;// ="вертолет"; - char *z; - //unsigned int pwc; - iconv_t cd; - int in, out; - - FILE *infile; - char *fname = "file.txt"; - - infile = fopen(fname,"r"); - - fseek(infile, 0, SEEK_END); - size_t file_size = ftell(infile); - rewind(infile); +/* FILE *infile; */ +/* char *fname = "file3.txt"; */ - //printf ("LOL\n"); +/* size_t testmax = 100; */ +/* size_t test = 0; */ - char *buffer = (char*)malloc(file_size * sizeof(char)); - if (buffer == NULL) - { - fclose(infile); - printf("Error allocating %d bytes.\n", file_size * sizeof(char)); - return -1; - } - size_t bytes_read = fread(buffer, sizeof(char), file_size, infile); - if (bytes_read != file_size) - { - printf("Have read only %d bytes of %d.\n", bytes_read, file_size); - free(buffer); - fclose(infile); - return -1; - } - - in=strlen(buffer); - z=malloc(in+1); - - out=in+1; - cd=iconv_open("CP1251","CP866"); -// printf("%x\n",cd); - int t; - t=iconv(cd, &buffer, &in, &z, &out); - printf("\nResult: %d", t); - puts(z); - //for (;s